library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
library(here)
here() starts at /Users/avrilwang/Desktop/Project-Plasmodium
library(deSolve)
library(crone)
library(optimParallel)
Loading required package: parallel
library(doParallel)
Loading required package: foreach
Loading required package: iterators
library(doRNG)
Loading required package: rngtools
library(arrow)
Attaching package: ‘arrow’
The following object is masked from ‘package:utils’:
timestamp
library(stringr)
library(parallel)
library(ggpubr)
library(scales)
Notebook for plotting all of the figures for PloS Biology manuscript submission
Guidelines: taken from https://journals.plos.org/plosbiology/s/figures#loc-figure-file-requirements 1. format: eps 2. max file size: 10 MB 3. text size: Arial, Times, or Symbol font only in 8-12 point 2. figure size: Width: 789 – 2250 pixels (at 300 dpi). Height maximum: 2625 pixels (at 300 dpi).
#=========================================# figure 1: best single and co-infection cue #=========================================# Figure displaying the reaction norms of best single and co-infection.
#——- optimal cue reaction norm ———–# # read data
process data for reaction norm
# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))
# get si_label with ci cue range
si_ci_rug.df <- ci_rug.df %>%
mutate(label_si = case_when(
label %in% c("I", "I1+I2") ~ "I",
label %in% c("I log","I1+I2 log") ~ "I log",
label %in% c("Ig", "Ig1+Ig2") ~ "Ig",
label %in% c("Ig log") ~ "Ig log",
label %in% c("sum", "I+Ig") ~ "I+Ig",
label %in% c("sum log", "I+Ig log") ~ "I+Ig log",
label == "R" ~ "R",
label == "R log" ~ "R log",
label %in% c("G", "G1+G2") ~ "G",
label == "G log" ~ "G log"
))
# get limit for si_rug
si_rug_lim.df <- si_rug.df %>%
filter(time <= 20) %>%
group_by(label)%>%
summarise(min = min(value, na.rm = T)*0.9,
max = max(value, na.rm = T)*1.1) %>%
select(label_si = label, min_si = min, max_si = max)
# filter to restriction conversion rate reaction norm range to cue ranges that appear in rug
## change to Inf/-inf to NA. Note that I am first joining with si rug lim to check which limit is larger, We will go with the cue range that has the largest span
ci_rug_lim.df <- si_ci_rug.df %>%
group_by(label) %>%
mutate(min = min(value, na.rm = T)*0.9,
max = max(value, na.rm = T)*1.1) %>%
distinct(label, .keep_all = T) %>%
select(label, label_si, min, max)
rug_lim.final <- ci_rug_lim.df %>% left_join(si_rug_lim.df, by = "label_si") %>%
mutate(final_min = min(min, min_si),
final_max = max(max, max_si))
# get second rug_lim.final for single infection
rug_lim.final2 <- rug_lim.final %>%
group_by(label_si) %>%
mutate(final_min = min(final_min, na.rm = T),
final_max = max(final_max, na.rm = T))
# filter ci_rn by limit
ci_rn.df2 <- ci_rn.df %>%
left_join(rug_lim.final, by = "label") %>%
group_by(label) %>%
filter(cue_range <= final_max & cue_range >= final_min) %>%
arrange(cue_range, .by_group = T) %>%
filter(row_number() %% 10 == 0)
# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))
match single infection rn with coinfection
# get ci label to si rug and filter by limit
si_rn.df2 <- left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c("label" = "label_si")) %>%
left_join(rug_lim.final, by = c("label_ci" = "label")) %>%
group_by(label_ci) %>%
filter(cue_range <= final_max & cue_range >= final_min) %>%
arrange(cue_range, .by_group = T) %>%
filter(row_number() %% 10 == 0) %>%
select(cue_range, cr, label_ci, label_si)
Warning in left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c(label = "label_si")) :
Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
# get ci label to si rug, we will keep one unique value per label
si_rug.df2 <- select(si_rug.df, value, label_si = label) %>% distinct(value, label_si)
plot reaction norm
# join with ezlabel
ci_rn.df3 <- ci_rn.df2 %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rn.df3 <- si_rn.df2 %>% left_join(ez_label, by = "label_ci")
ci_rug.df3 <- ci_rug.df %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rug.df3 <- si_rug.df2 %>% left_join(ez_label, by = "label_si")
Warning in left_join(., ez_label, by = "label_si") :
Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 27002 of `x` matches multiple rows in `y`.
ℹ Row 12 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
# redo order of cues
ci_rn.df3$ez_label <- factor(ci_rn.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
si_rn.df3$ez_label <- factor(si_rn.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
ci_rug.df3$ez_label <- factor(ci_rug.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
si_rug.df3$ez_label <- factor(si_rug.df3$ez_label,
levels = c("Asexual iRBC", "Asexual iRBC log10",
"Total asexual iRBC", "Total asexual\niRBC log10",
"Sexual iRBC", "Sexual iRBC log10",
"Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
"Total iRBC", "Total iRBC log10",
"Gametocyte", "Gametocyte log10",
"Total gametocyte", "Total sexual iRBC",
"RBC", "RBC log10"))
# plot
ggplot() +
geom_line(data = ci_rn.df3, aes(x = cue_range, y = cr, color = "Co-infection")) +
geom_point(data = ci_rn.df3 %>%
group_by(label) %>%
mutate(ten_th = round(n()/10)) %>%
filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Co-infection", shape = "Co-infection"), size = 2) +
geom_line(data = si_rn.df3, aes(x = cue_range, y = cr, color = "Single infection")) +
geom_point(data = si_rn.df3 %>%
group_by(label_ci) %>%
mutate(ten_th = round(n()/10)) %>%
filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Single infection", shape = "Single infection"), size = 2) +
geom_rug(data = ci_rug.df3, aes(x = value), color = "#4575b4", sides = "t", length = unit(0.1, "npc")) +
geom_rug(data = si_rug.df3, aes(x = value), color = "#fc8d59", sides = "b", length = unit(0.1, "npc")) +
facet_wrap(~ez_label, scales = "free_x", ncol = 2) +
ylim(-0.3, 1.3) +
theme_bw() +
labs(y = "Conversion rate", x = "Cue range", color = "Model", shape = "Model") +
scale_x_continuous(labels = function(x) format(x, scientific = T),
guide = guide_axis(check.overlap = TRUE)) +
theme(axis.text.x = element_text(size = 7),
legend.position = "right") +
scale_color_manual(values=c( "#4575b4", "#fc8d59")) +
theme(strip.text.x = element_text(margin = margin(b = 0.5, t = 0.5)))
ggsave(units = "px", dpi = 300, width = 2000, height = 2500, filename = here("figures/plos-bio/reaction_norm.tiff"), bg = "white", scale = 1.1)

#=========================================# Plotting single and co-infection fitness scatter plot #=========================================# # import in data
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
# co-infection dynamics
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
ez_label <- read.csv(here("data/ez_label.csv"))
process for final 20 days fitness
d
Error: object 'd' not found
plot scatter point of single infection vs co-infection
si_ci_fitness.pl <- ggplot() +
geom_point(data = si_ci_fitness.df, aes(x = fitness_si, y = fitness_ci, color = ez_label_si, shape = ez_label_si), size = 3.5) +
ggrepel::geom_label_repel(data = si_ci_fitness.df, aes(label = ez_label, x = fitness_si, y = fitness_ci),
fill = "white",xlim = c(-Inf, Inf), ylim = c(NA, NA)) +
labs(x = "Maximum single infection fitness", y = "Maximum Co-infection fitness (per strain)",
color = "Single infection cue", shape = "Single infection cue") +
scale_shape_manual(values = 15:25) +
lims(x = c(8, 10.3)) +
theme_bw()
#=========================================================# # time series conversion rate for single and co-infection #=========================================================# #———get “ideal” single infection and co-infection dynamics———# This is when stuff are optimized based on time
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/chabaudi_ci_clean.R"))
# single infection dynamic with time as cue. Optimized using local optimizer. Note that the time variable in dual cue is slightly different with higher flexibility. While that increases the fitness value by ~0.1, the overall conversion rate dynamic does not change that much
si_t.df <- chabaudi_si_clean(
parameters_cr = c(4.55386, -13.0056, 4.15466, -11.9424),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, by = 1e-3),
cue_range = seq(0, 20, by = 1e-3),
cue = "t",
solver = "vode",
dyn = T)
# co-infection dynamic with time as cue
ci_t.df <- chabaudi_ci_clean(parameters_cr_1 = c(26.16425, -71.07799, 53.34121, -166.25693),
parameters_cr_2 = c(26.16425, -71.07799, 53.34121, -166.25693),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = time_range,
cue_1 = "t",
cue_2= "t",
cue_range_1 = time_range, # cue range of strain 1
cue_range_2 = time_range, # cue range of strain 2
log_cue_1 = "none", # whether to log transform cue 1
log_cue_2 = "none", # whether to log transform cue 2
solver = "vode", # solver for numerical integration. Vode often gives faster runs
dyn = T)
# get only conversion rate and make same format as si_cr.df2/ci_cr.df2
si_t.df %>% filter(time == 20 & variable == "tau_cum") ## fitness value of time as cue in single infection
si_t.cr <- si_t.df %>%
filter(variable == "cr") %>%
mutate(ez_label_si = "Time",
fitness_si =9.787899) %>%
select(time, value, fitness_si, ez_label_si)
## get fitness value of co-infection time
ci_t.df %>% filter(variable == "tau_cum1") %>% summarize(max = max(value, na.rm = T))
ci_t.cr <- ci_t.df %>%
filter(variable == "cr_1") %>%
mutate(ez_label = "Time",
fitness_ci = 2.311841) %>%
select(time, value, fitness_ci, ez_label)
#——— single infection conversion rate heat map————–# # process info for single infection
—————–co-infection conversion rate heatmap———–
plot co-infeciton convesion rate heatmap
ggplot() +
geom_raster(data = ci_cr.df2,
aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
labs(x = "Time (days)", y = "Co-infection cues", fill = "Conversion rate") +
viridis::scale_fill_viridis(limits = c(0, 1), , oob = scales::squish) +
xlim(1, 20) +
theme_bw()
Warning: Removed 17028 rows containing missing values (`geom_raster()`).

#——— assemble final figure ————–#
# combine conversion rate dynamic of single infection and co-infection
cr.pl <- ggarrange(si_cr.pl, ci_cr.pl, labels = c("B", "C"), ncol = 2, common.legend = T, align = "h")
# combine with fitness scatterplot
ggarrange(si_ci_fitness.pl, cr.pl, ncol = 1, labels = c("A", ""), align = "hv")
# save
ggsave(units = "px", dpi = 300, width = 2000, height = 2000, filename = here("figures/plos-bio/fitness_cr-dyn.tiff"), bg = "white", scale = 1.35)
#===========================================================# # Demographic stochasticity #===========================================================# #———- plot heat map—————# # import in all fitness files
file_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv", full.names = T)
name_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv")
name_ls <- gsub("*.csv", "", name_ls)
# 60, which is about right
length(file_ls)
[1] 60
# read in files
fitness.ls <- lapply(file_ls, read.csv)
Warning: stack imbalance in 'lapply', 6 then 8
Warning: stack imbalance in '<-', 2 then 4
# assign unique ID
fitness.ls <- mapply(cbind, fitness.ls, "ID" = name_ls, SIMPLIFY = F)
process data
# get metainfo from ID
fitness.ls2 <- mclapply(fitness.ls, function(x){
id_col <- x$ID
# string split to extract all info
cue <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
log <- unlist(str_split(unique(id_col), pattern = "_"))[[4]]
rand_var <- unlist(str_split(unique(id_col), pattern = "_"))[[5]]
# get mean
mean_fitness <- mean(x$max_fitness)
# get sd
sd_fitness <- sd(x$max_fitness)
# bind results
res <- cbind(x, cue= cue, log = log, rand_var = rand_var, mean_fitness = mean_fitness, sd_fitness = sd_fitness)
return(res)
})
Get reference data
reference_ls <- list.files(path = here("data/MC2"), pattern = "*.csv", full.names = T)
reference_name.ls <- gsub("*.csv", "", list.files(path = here("data/MC2/"), pattern = "*.csv"))
# read in the files
reference.ls <- lapply(reference_ls, read.csv)
# assign unique ID
reference.ls <- mapply(cbind, reference.ls, "ID" = reference_name.ls, SIMPLIFY = F)
# get meta data
reference.ls2 <- mclapply(reference.ls, function(x){
id_col <- x$ID
# string split to extract all info
cue <- unlist(str_split(unique(id_col), pattern = "_"))[[2]]
# get log
third_col <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
log <- ifelse(third_col == "log", "log10", "none")
# get mean
mean_fitness <- mean(x$max_fitness)
# get sd
sd_fitness <- sd(x$max_fitness)
# bind results
res <- cbind(x, cue= cue, log = log, rand_var = "all", ref_mean_fitness = mean_fitness, ref_sd_fitness = sd_fitness)
return(res)
})
combine MC partitioned and reference df
# get unique column values for each cue, log, and rand_var combo
fitness.ls3 <- do.call(rbind, fitness.ls2)
fitness.ls3 <- fitness.ls3 %>% dplyr::distinct(ID, .keep_all = T)
# repeat with reference
reference.ls3 <- do.call(rbind, reference.ls2)
reference.ls3 <- reference.ls3 %>% dplyr::distinct(ID, .keep_all = T)
# combine!
ref_fit.df <- left_join(fitness.ls3, reference.ls3, by = c("cue" = "cue", "log"= "log"))
compute proportion fitness and variation
ref_fit.df2 <- ref_fit.df %>%
mutate(p_sd = sd_fitness/ref_sd_fitness,
p_mean = ref_mean_fitness/mean_fitness,
cue_log = paste0(cue, "_", log),
label = case_when(
cue == "G" ~ "Gametocyte",
cue == "I" ~ "Asexual iRBC",
cue == "I+Ig" ~ "Asexual&sexual\niRBC",
cue == "Ig" ~ "Sexual iRBC",
cue == "R" ~ "RBC"
),
parameter = case_when(
rand_var.x == "rho" ~ "ρ",
rand_var.x == "phin" ~ "ϕn",
rand_var.x == "phiw"~ "ϕw",
rand_var.x == "psin" ~ "ψn",
rand_var.x == "psiw" ~ "ψw",
rand_var.x == "beta" ~ "β"
))
plot!
# variation
mc_b <- ggplot() +
geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_sd)) +
facet_wrap(~log) +
theme_bw() +
viridis::scale_fill_viridis() +
labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(sd("1 parameter randomized"), sd("all parameters randomized")))) +
theme(legend.position="top",
axis.text.x = element_text(angle = 45, hjust=1))
# mean fitness
mc_c <- ggplot() +
geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_mean)) +
facet_wrap(~log) +
theme_bw() +
viridis::scale_fill_viridis() +
labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(Mean("all parameters randomized"), Mean("1 parameter randomized")))) +
theme(legend.position="top",
axis.text.x = element_text(angle = 45, hjust=1))
mc_partition <- ggarrange(mc_b, mc_c, ncol = 1)
#————– get violine plot of variation in fitness ——————–# # read MC data
# read in dymamics
mc_G_log.dyn <- read_parquet(here("data/MC2/mc_G_log_dyn.parquet"))
mc_G.dyn <- read_parquet(here("data/MC2/mc_G_dyn.parquet"))
mc_R_log.dyn <- read_parquet(here("data/MC2/mc_R_log_dyn.parquet"))
mc_R.dyn <- read_parquet(here("data/MC2/mc_R_dyn.parquet"))
mc_I_log.dyn <- read_parquet(here("data/MC2/mc_I_log_dyn.parquet"))
mc_I.dyn <- read_parquet(here("data/MC2/mc_I_dyn.parquet"))
mc_Ig_log.dyn <- read_parquet(here("data/MC2/mc_Ig_log_dyn.parquet"))
mc_Ig.dyn <- read_parquet(here("data/MC2/mc_Ig_dyn.parquet"))
mc_I_Ig_log.dyn <- read_parquet(here("data/MC2/mc_I+Ig_log_dyn.parquet"))
mc_I_Ig.dyn <- read_parquet(here("data/MC2/mc_I+Ig_dyn.parquet"))
# read in fitness
mc_G_log.fitness <- read.csv(here("data/MC2/mc_G_log_fitness.csv"))
mc_G.fitness <- read.csv(here("data/MC2/mc_G_fitness.csv"))
mc_R_log.fitness <- read.csv(here("data/MC2/mc_R_log_fitness.csv"))
mc_R.fitness <- read.csv(here("data/MC2/mc_R_fitness.csv"))
mc_I_log.fitness <- read.csv(here("data/MC2/mc_I_log_fitness.csv"))
mc_I.fitness <- read.csv(here("data/MC2/mc_I_fitness.csv"))
mc_Ig_log.fitness <- read.csv(here("data/MC2/mc_Ig_log_fitness.csv"))
mc_Ig.fitness <- read.csv(here("data/MC2/mc_Ig_fitness.csv"))
mc_I_Ig_log.fitness <- read.csv(here("data/MC2/mc_I+Ig_log_fitness.csv"))
mc_I_Ig.fitness <- read.csv(here("data/MC2/mc_I+Ig_fitness.csv"))
examine variation
# plot fitness vs iteration
fitness.df <- rbind(
cbind(mc_G_log.fitness, id = "Gametocyte\nlog10"),
cbind(mc_G.fitness, id = "Gametocyte"),
cbind(mc_R_log.fitness, id = "RBC log10"),
cbind(mc_R.fitness, id = "RBC"),
cbind(mc_I_log.fitness, id = "Asexual iRBC\nlog10"),
cbind(mc_I.fitness, id = "Asexual iRBC"),
cbind(mc_Ig_log.fitness, id = "Sexual iRBC\nlog10"),
cbind(mc_Ig.fitness, id = "Sexual iRBC"),
cbind(mc_I_Ig_log.fitness, id = "Asexual&sexual iRBC\nlog10"),
cbind(mc_I_Ig.fitness, id = "Asexual&sexual\niRBC")
)
# quantify variance and mean
fitness_var.df <- fitness.df %>%
dplyr::group_by(id) %>%
dplyr::summarise(median = median(max_fitness)) %>%
dplyr::mutate(id = forcats::fct_reorder(id, median))
plot violin with difference in deterministic model fitness and mean model fitness
# get deterministic df
det.df <- data.frame(fitness_var.df, `Maximum fitness` = c(8.49777, 9.494991,8.854682,9.573291,8.58856,9.561373,8.23991,8.181604,8.569285,9.618812)) %>%
dplyr::mutate(id = forcats::fct_reorder(id, median)) %>%
tidyr::pivot_longer(-id) %>%
mutate(classification = case_when(
name == "Maximum.fitness" ~"Optimal single infection",
name == "median" ~"Median Monte Carlo",
name == "mean" ~ "Mean Monte Carlo"))
mc_a <- ggplot() +
geom_point(data = det.df, aes(y = id, x = value, shape = classification, color = classification), size = 3, alpha = 0.8) +
geom_violin(data = fitness.df, aes(y = id, x = max_fitness), fill = "transparent") +
labs(x = "Fitness", y = "Cue", color = "Fitness", shape = "Fitness") +
theme_bw() +
theme(legend.position = "bottom") +
scale_color_manual(values=c("black", "#4575b4", "#fc8d59"))
#————– plot together———————–#
# arrange the heat map
ggarrange(mc_a, mc_partition, ncol = 2, nrow = 1, labels = c("A", "B"), align = "h")
Warning: Graphs cannot be horizontally aligned unless the axis parameter is set. Placing graphs unaligned.

#——————————————–# # dual cue optimization figure #——————————————–#
source(here("functions/chabaudi_si_clean_high.R"))
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/par_to_hm_te.R"))
#———- plotting fitness of dual vs single cue opt ———# # import in previous data
# dual cue fitness
dual_fitness.df <- read.csv(here("data/dual_cue_opt4/dual_cue_fitness_20.csv"))
## make label and filter out very low fitness
dual_fitness.df <- dual_fitness.df %>%
mutate(temp_label = gsub("log", "log10", label),
temp_label_b = gsub("log", "log10", label_b),
label_final = paste0(temp_label, "+", temp_label_b)) %>%
filter(value > 2)
# get single cue fitness
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
si_fitness.df <- si_dyn.df %>%
filter(variable == "tau_cum" & time == 20)
# join si and dual cue
dual_si_fitness.df <- dual_fitness.df %>%
left_join(select(si_fitness.df, id, si_fitness = value), by = "id") %>%
left_join(select(si_fitness.df, id_b = id, si_fitness_b = value), by = "id_b") %>%
mutate(si_fitness_max = ifelse(si_fitness > si_fitness_b, si_fitness, si_fitness_b),
dual_label = gsub("log", "log10", paste(label, "+", label_b)))
plot
dual_si_fitness.pl <- ggplot() +
geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = value, color = "Dual cue", shape = "Dual cue"), size = 3, alpha = 0.7) +
geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = si_fitness_max, color= "Best single cue", shape= "Best single cue"), size = 3, alpha = 0.7) +
labs(x = "Fitness", y = "Dual cue", color = "Cue", shape = "Cue") +
scale_color_manual(values=c("#fc8d59", "#4575b4")) +
geom_vline(xintercept = 9.883602) +
geom_text(aes(x=9.883602, label="\nTime-based fitness (df=9)", y = "G + R log10"), angle=90) +
xlim(8.35, 10) +
theme_bw() +
theme(legend.position = "top")
#———– time series conversion rate ————-# # dynamics simulation of high parameter cues (these serve as reference points)
# best dual cue combo
dual.cr <- chabaudi_si_clean(
parameters_cr = c(4.446192033, 10.97518275, 1.38762817, 23.3059254, -3.452052371, -18.0070692, 39.66614226, -3.545193141, 18.78350799),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, by = 1e-3),
cue_range = seq(6, 7, by = 1/500),
cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
cue = "R",
cue_b = "I",
log_cue = "log10",
log_cue_b = "log10",
solver = "vode",
dyn = T
)
*** recursive gc invocation
*** recursive gc invocation
*** recursive gc invocation
*** recursive gc invocation
*** recursive gc invocation
*** recursive gc invocation
*** recursive gc invocation
*** recursive gc invocation
*** recursive gc invocation
Warning in Predict.matrix.cr.smooth(object$margin[[i]], dat) :
passing an object of type 'language' to .C (arg 8) is deprecated
plot
dual_cr.pl <- ggplot() +
geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 3) +
labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
xlim(0, 20) +
scale_color_manual(values = c("#fc8d59","#fdcb44","black", "#4575b4")) +
theme_bw() +
theme(legend.position="right",
plot.margin = margin(t = 40, r = 0, b = 0, l = 0, unit = "pt")) +
guides(color = guide_legend(nrow = 4, byrow = TRUE))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
#———— reaction norm heatmap of R log10 + I log10 ————# # process data
# make heatmap df
R_I.hm <- par_to_hm_te(par = c(4.446192033, 10.97518275, 1.38762817, 23.3059254, -3.452052371, -18.0070692, 39.66614226, -3.545193141, 18.78350799),
cue_range = seq(6, 7, length.out = 500),
cue_range_b = seq(0, 6.77815125, length.out = 500))
# process dynamics
R_I.dyn <- dual.cr %>%
tidyr::pivot_wider(names_from = variable, values_from = value) %>%
mutate(log_R = log10(R),
log_I = log10(I))
# examine sexual iRBC as well
R_Ig.dyn <- dual.cr %>%
tidyr::pivot_wider(names_from = variable, values_from = value) %>%
mutate(log_R = log10(R),
log_Ig = log10(Ig))
max(R_Ig.dyn$Ig)
[1] 175754.2
plot
dual_rn.pl <- ggplot() +
geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
scale_fill_viridis_c() +
geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion\nrate") +
theme_dark() +
theme(legend.position = "right")
# just testing for sexual iRBC vs RBC
ggplot() +
geom_path(data = R_Ig.dyn, aes(x = Ig, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
geom_point(data = R_Ig.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = Ig, y = log_R), color = "white") +
scale_x_continuous(trans = "log10") +
xlim(0, 200000) +
labs(y = "RBC log10", x = "Sexual iRBC log10", fill = "Conversion rate") +
theme_dark()
Scale for x is already present.
Adding another scale for x, which will replace the existing scale.

save figure for poster
dual_rn.pl2 <- ggplot() +
geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
scale_fill_viridis_c() +
geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
theme_dark() + theme(legend.position="top")
dual_cr.pl2 <- ggplot() +
geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 2) +
labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
xlim(0, 20) +
scale_color_manual(values = c("#4575b4", "#91bfdb","#fc8d59","#fdcb44")) +
theme_bw() +
theme(legend.position="top") +
guides(color = guide_legend(nrow = 2, byrow = TRUE))
ggarrange(dual_cr.pl2, dual_rn.pl2, align = "h", widths = c(1.25, 1))
ggsave(here("poster/dual_cue.png"), width = 7, height = 4)
#——– assemble final figure ————-#
# assemble panel B and C
dual_pl.BC <- ggarrange(dual_cr.pl, dual_rn.pl, align = "v", ncol = 1, labels = c("B", "C"))
Warning: Removed 3 rows containing missing values (`geom_line()`).
Warning: Removed 3 rows containing missing values (`geom_point()`).
Warning: Removed 219202 rows containing missing values (`geom_raster()`).
# assemble panel A
ggarrange(dual_si_fitness.pl, dual_pl.BC, ncol = 2, labels = c("A", ""), widths = c(1,1.1))
ggsave(here("figures/plos-bio/dual_cue.tiff"), units = "px", width = 2100, height = 1400, scale = 1.3, dpi=300, bg = "white")

#============================================# # dynamics of dual cue simulation (all combinations) #============================================# # conversion rate
ggsave(here("figures/plos-bio/dual_cue_cr.tiff"), units = "px", width = 2000, height = 1500, dpi=300, bg = "white")
Warning: Removed 26448 rows containing missing values (`geom_raster()`).
cumultative transmission transmission potential
Proving a point to myself
# it seems that R log+I log only inches ahead of other at the very end of the infection, suggesting that terminal investment is at play
ggplot(data = dual_tau.df_p, aes(x = time, y = value, color = label_plot)) +
geom_line() +
scale_fill_viridis_c() +
xlim(15, 20) +
ylim(5,10) +
labs(x = "Time (days)", y = "Dual cue combination", fill = "Conversion rate") +
theme_bw()
Warning: Removed 12500 rows containing missing values (`geom_line()`).

#============================================# # get dual cue disease map (simulated) #============================================# I am going to take the optimal dynamic (R log10 + I log10) and plot all possible disease curves and see if any follow the hysteresis curve. See below for real life disease curve.
plot
ggarrange(plotlist = dual_curve.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve1.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300, bg = "white")

ggarrange(plotlist = dual_curve_xlog.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve2.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300, bg = "white")

ggarrange(plotlist = dual_curve_ylog.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve3.tiff"), units = "px", width = 1000, height =1000, scale = 1.5, dpi=300, bg = "white")

ggarrange(plotlist = dual_curve_log.pl, ncol = 2, nrow = 3, align = "hv")
Warning: Removed 1 row containing missing values (`geom_path()`).
Warning: Removed 1 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 37 rows containing missing values (`geom_path()`).
Warning: Removed 4 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
Warning: Removed 57 rows containing missing values (`geom_path()`).
Warning: Removed 6 rows containing missing values (`geom_point()`).
ggsave(here("figures/plos-bio/dual_curve4.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300, bg = "white")

#============================================# # real life disease curve #============================================# # execute code from report 10 to get final dataset The following graphs will be made: - R vs iRBC - R log10 vs iRBC - R vs iRBC log10 - R log10 vs iRBC log10
G vs iRBC
G log10 vs iRBC
G vs iRBC log10
G log10 vs iRBC log10
R vs G
R log10 vs G
R vs G log10
R log10 vs G log10
R on y-axis
r_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = asex)) +
geom_path(aes(y = RBC, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = asex)) +
geom_path(aes(y = log10(RBC), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
r_ilog.dc <-ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = log10(asex))) +
geom_path(aes(y = RBC, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_ilog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = log10(asex))) +
geom_path(aes(y = log10(RBC), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
G on y-axis
g_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = gam, x = asex)) +
geom_path(aes(y = gam, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
glog_i.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(gam), x = asex)) +
geom_path(aes(y = log10(gam), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "iRBC per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
g_ilog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = gam, x = log10(asex))) +
geom_path(aes(y = gam, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
glog_ilog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(gam), x = log10(asex))) +
geom_path(aes(y = log10(gam), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(iRBC) per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
R vs G
r_g.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = gam)) +
geom_path(aes(y = RBC, x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "Gametocyte per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_g.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = gam)) +
geom_path(aes(y = log10(RBC), x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "Gametocyte per µL", y = "log10(RBC per µL)", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
r_glog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = RBC, x = log10(gam))) +
geom_path(aes(y = RBC, x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(Gametocyte) per µL", y = "RBC per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
rlog_glog.dc <- ggplot(exp_ss.df) +
geom_point(aes(y = log10(RBC), x = log10(gam))) +
geom_path(aes(y = log10(RBC), x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
theme_bw() +
scale_color_viridis_c(limits = c(3, 21)) +
labs(x = "log10(Gametocyte) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
plot together


#===========================================# # static competition #===========================================# #——- heat map —————# # calculate fitness difference for 20 days
# get dynamics
static.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static.ls <- lapply(static.ls, read_parquet)
# get fitness at day 20 (optimized for 20 days)
static_fitness.ls <- mclapply(static.ls, function(x){
x %>% filter(time == 20 & variable %in% c("tau_cum1", "tau_cum2"))
})
static_fitness.df <- do.call(rbind, static_fitness.ls)
static_fitness.df <- tidyr::pivot_wider(static_fitness.df, names_from = "variable", id_cols = c("id_1", "id_2")) %>%
group_by(id_1, id_2) %>%
mutate(fitness_difference = tau_cum1-tau_cum2)
write.csv(static_fitness.df, here("data/ci_static.csv"))
import and process data
ez_label <- read.csv(here("data/ez_label.csv"))
ez_label <- read.csv(here("data/ez_label.csv"))
# join with labelling
static.df2 <- static.df %>%
left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("id_1" = "id_ci")) %>%
left_join(select(ez_label, id_ci, label_ci_2 = label_ci), by = c("id_2" = "id_ci")) %>%
select(label_ci_1, label_ci_2, fitness_difference) %>%
mutate(label_ci_1 = gsub("log", "log10", label_ci_1),
label_ci_2 = gsub("log", "log10", label_ci_2))
Error in left_join(., select(ez_label, id_ci, label_ci_1 = label_ci), :
object 'static.df' not found
plot
ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
Warning: Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.

ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
Warning: Graphs cannot be vertically aligned unless the axis parameter is set. Placing graphs unaligned.
ggsave(here("figures/plos-bio/static_competition_a.tiff"), units = "px", width = 2250, height = 1500, scale = 1.2, dpi=300, bg = "white")

#—— effect cue perception ——-# ## logging
# get non-logged pairings
static_nolog <- static.df2 %>%
mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none")) %>%
filter(log_1 == "none")
static_log <- static.df2 %>%
mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none")) %>%
filter(log_1 == "log")
static_log.df <- left_join(
select(static_nolog, cue_1, label_ci_2, log_1, None = fitness_difference),
select(static_log, cue_1, label_ci_2, log_1, Log = fitness_difference),
by = c("cue_1", "label_ci_2")) %>%
filter(!is.na(None) & !is.na(Log)) %>%
mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
combined
static_nocomb <- static.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "none")
static_comb <- static.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "comb")
static_comb.df <- left_join(
select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_difference),
select(static_comb, cue_1, label_ci_2, log_1, Total = fitness_difference),
by = c("cue_1", "log_1", "label_ci_2")) %>%
filter(!is.na(Total) & !is.na(Self)) %>%
mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
plot

ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggsave(here("figures/plos-bio/static_competition_b.tiff"), units = "px", width = 1000, height = 2000, scale = 1.2, dpi=300, bg = "white")

#===========================================# static competition dynamics #===========================================# #———–G log10————#
# plot together
Glog_dyn.pl <- ggarrange(Glog_cr.pl, Glog_I.pl, Glog_G.pl, Glog_tau.pl, ncol = 1, align = "v")
Warning: Removed 272032 rows containing missing values (`geom_raster()`).
Warning: Removed 272032 rows containing missing values (`geom_raster()`).
Warning: Removed 272032 rows containing missing values (`geom_raster()`).
Warning: Removed 272032 rows containing missing values (`geom_raster()`).
#———–R as cue———–#
R_dyn.pl <- ggarrange(R_cr.pl, R_I.pl, R_R.pl, R_tau.pl, ncol = 1, align = "v")
Warning: Removed 238028 rows containing missing values (`geom_raster()`).
Warning: Removed 238028 rows containing missing values (`geom_raster()`).
Warning: Removed 238028 rows containing missing values (`geom_raster()`).
Warning: Removed 238028 rows containing missing values (`geom_raster()`).
plot together

#===========================================# # invasion analysis #===========================================# # import in data (already 20 days )
invade.df <- read.csv(here("data/ci_invasion.csv"))
process data for invasion matrix
invade.mat <- invade.df %>%
group_by(V1 = pmin(mut_id, res_id), V2 = pmax(mut_id, res_id)) %>% # group by cue competition, irregardless of order
mutate(id_alt = paste0(V1, V2),
invade = case_when(
fitness > 0 ~ "invade",
fitness < 0 ~ "not invade"
)) %>%
group_by(id_alt) %>%
mutate(
mut_is_V1 = case_when(
mut_id == V1 ~ "V1_invade",
mut_id != V1 ~ "V1_invaded")) %>%
arrange(id_alt) %>%
select(fitness, V1, V2, id_alt, invade, mut_is_V1) %>%
tidyr::pivot_wider(names_from = mut_is_V1, values_from = fitness) %>%
group_by(id_alt) %>%
mutate(V1_invade2 = gsub("NA", "", paste0(V1_invade, collapse = "")),
V1_invaded2 = gsub("NA", "", paste0(V1_invaded, collapse = ""))) %>%
distinct(id_alt, .keep_all = T) %>%
mutate(
category = case_when(
V1_invade2 > 0 & V1_invaded2 > 0 ~ "Mutual invasion",
V1_invade2 > 0 & V1_invaded2 < 0 ~ "Only strain 1 invasion",
V1_invade2 < 0 & V1_invaded2 > 0 ~ "Only strain 2 invasion",
V1_invade2 < 0 & V1_invaded2 < 0 ~ "Mutual non-invasion"
)) %>%
select(V1, V2, invasion = category)
invade.df %>% filter(mut_id == "G-i_none")
invade.df %>% filter(res_id == "G-i_none")
invade.mat4 <- rbind(
select(invade.mat3, V1_label, V2_label, invasion),
select(invade.mat3, V2_label = V1_label, V1_label = V2_label) %>% mutate(invasion = NA)) %>%
mutate(
invasion_2 = case_when(
invasion == "Mutual invasion" ~ "Mutual invasion",
invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
)) %>%
filter(!is.na(V1_label))
Adding missing grouping variables: `id_alt`
Adding missing grouping variables: `id_alt`
plot invasion matrix

create summary bar chart
# create a stacked barchart for summary
## filter out na
invade.matalt <- invade.mat3 %>% na.exclude()
# get frquency from both sides. Note when grouping for V2, from the perspective of cue 2, scenarrio when strain 2 invade = strain 1 invade
invade.matalt1 <- invade.matalt %>% group_by(V1_label, invasion) %>%
summarize(frequency_1 = n())
invade.matalt2 <- invade.matalt %>%
mutate(invasion_alt = case_when(
invasion == "Only strain 1 invasion" ~ "Only strain 2 invasion",
invasion == "Only strain 2 invasion" ~ "Only strain 1 invasion",
invasion == "Mutual invasion" ~ "Mutual invasion",
invasion == "Mutual non-invasion" ~ "Mutual non-invasion"
)) %>%
group_by(V2_label, invasion_alt) %>%
summarize(frequency_2 = n())
# full join and sum. has confirmed all of them add up to 14
invade.matalt3 <- full_join(invade.matalt1, invade.matalt2, by = c("V1_label" = "V2_label", "invasion" = "invasion_alt"))
invade.matalt3[is.na(invade.matalt3)] <- 0
invade.matalt4 <- invade.matalt3 %>%
mutate(freq = frequency_1 + frequency_2) %>%
mutate(temp = case_when(
invasion == "Only strain 1 invasion" ~ freq
)) %>%
group_by(V1_label) %>%
mutate(invade_1_freq = max(temp, na.rm = T)) %>%
mutate(invasion_2 = case_when(
invasion == "Mutual invasion" ~ "Mutual invasion",
invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
))

plot together


ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/plos-bio/invasion_a.tiff"), units = "px", width = 2250, height = 1100, scale = 1.4, dpi=300, bg = "white")

#—————- invasion pairwise comparison—————–# ## proces data
combined
invade_nocomb <- invade.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "none")
invade_comb <- invade.df2 %>%
mutate(cue_1 = ifelse(
str_detect(label_ci_1, "sum"), "I+Ig",
trimws(gsub("+1.*|\\ log", "", label_ci_1))
),
log_1 = case_when(
str_detect(label_ci_1, "log") ~ "log",
str_detect(label_ci_1, "log", negate = T) ~ "none"),
comb_1 = case_when(
str_detect(label_ci_1, "1+|sum") ~ "comb",
str_detect(label_ci_1, "1+|sum", negate = T) ~ "none"
)) %>%
filter(comb_1 == "comb")
invade_comb.df <- left_join(
select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
select(invade_comb, cue_1, res_id, log_1, Total = fitness),
by = c("cue_1", "log_1", "res_id")) %>%
filter(!is.na(Total) & !is.na(Self)) %>%
mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
invade_comb.df
plot

ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggsave(here("figures/plos-bio/invasion_b.tiff"), units = "px", width = 2250, height = 850, scale = 1.2, dpi=300, bg = "white")

#===========================================# # Cue performance across single, co-infection, static, and invasion #===========================================#
plot

#-=====================# # Partitioning best cue #=====================-# #——- single infection ———–# # redo some optimization (lower fitness in no R than default)
source(here("functions/chabaudi_si_clean_R.R"))
source(here("functions/chabaudi_si_clean_N.R"))
# I none
cl <- makeCluster(detectCores()); setDefaultCluster(cl = cl)
I_no_R <- optimParallel(
par = rep(0.5,4), # start at 0.5x4
fn = chabaudi_si_clean_R,
control = list(trace = 6, fnscale = -1),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, by = 1e-3),
cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000),
cue = "I",
log_cue = "none",
solver = "vode")
stopCluster(cl)
# 0.144021 -43.1046 2030.27 -524.686
# 8.69589
import and process data
# import in data
si_partition.ls <- list.files(path = here("data/partition/si/"), pattern = "*.csv", full.names = T)
si_partition.ls <- lapply(si_partition.ls, read.csv)
si_partition.df <- do.call(rbind, si_partition.ls)
# combine with si fitness (default)
si_partition.df <- si_partition.df %>% left_join(select(si_fitness.df, id, fitness = value), by = "id")
# make longer
si_partition.df2 <- tidyr::pivot_longer(si_partition.df, c(fitness_R, fitness_N, fitness_W, fitness))
# calculate coefficient of variation. Also rename
si_partition.df2 <- si_partition.df2 %>%
group_by(name) %>%
mutate(cv = sd(value)/mean(value)*100,
mean = mean(value),
category = case_when(
name == "fitness_R" ~ "No RBC limitation",
name == "fitness_W" ~ "No targeted immunity",
name == "fitness_N" ~ "No indiscriminate\nimmunity",
name == "fitness" ~ "Default"
))
plot
library(ungeviz)
# raw fitness
si_partition.pl1 <- ggplot() +
geom_vpline(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = mean, group = category, color = category), show.legend = F, size = 1) +
geom_point(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value), size = 2, alpha = 0.7) +
geom_line(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value, group = id), alpha = 0.2) +
labs(x = "Fitness", y = "Conditions") +
theme_bw()
# coefficient of variation
si_partition.pl2 <- ggplot() +
geom_bar(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = cv), stat = "identity") +
labs(x = "Coefficient of\nvariation (%)", y = "") +
theme_bw() +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
si_partition.pl <- ggarrange(si_partition.pl1, si_partition.pl2, widths = c(1, 0.3), align = "h")
si_partition.pl
ggsave(here("figures/plos-bio/partition_fitness.tiff"), width = 7, height = 4)
#——- consequences of no targeted immunity ————# # get dynamics of no targeted immunity
get_dyn <- function(df){
source(here("functions/chabaudi_si_clean_W.R"))
id <- df$id
cue <- df$cue
log <- df$log
par <- c(df$var_W1, df$var_W2, df$var_W3, df$var_W4)
cue_range <- seq(df$low, df$high, by = df$by)
# get dynamics
dyn <- chabaudi_si_clean_W(
parameters_cr = par,
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 20, by = 1e-3),
cue_range = cue_range,
cue = cue,
log_cue = log,
solver = "vode",
dyn = T
)
# combine
dyn2 <- cbind(dyn, id = id, cue = cue, log = log)
write_parquet(dyn2, paste0(here("data/partition/si_dyn/"), id, "_noW_dyn.parquet"))
}
get df to run
# join with cue_range
cue_range_si.df <- read.csv(here("data/cue_range_si.csv"))
si_partition.df3 <- si_partition.df %>% left_join(select(cue_range_si.df, low, high, by, id), "id")
# lapply loop
si_partition.ls <- split(si_partition.df3, seq(nrow(si_partition.df3)))
mclapply(si_partition.ls, get_dyn)
process dataframe
plot conversion rate
mechanism: targeted immunity led to lower parasite density in the initial stages, which prevents parasites from making the switch from no conversion rate to high conversion rate. When parsite density undergoes drastic increase at the beginning due to lower immunity, this presents a higher degree of signal that allows parasite to make the switch appropriately
partition_cr.pl <- ggplot() +
geom_line(data = no_W.cr, aes(x = time, y= value, color = "No targeted immunity")) +
geom_line(data = si_dyn.cr, aes(x = time, y= value, color = "Default")) +
facet_wrap(~ez_label_si, ncol = 5) +
xlim(0, 20) +
geom_vline(xintercept = 7) +
labs(x = "Time (days)", y = "Conversion rate", color = "Condition") +
theme_bw()
no_W.cr
#—– cue state ————–#
function to get cue states
# function to get cue states
get_cue_state <- function(df){
cue <- trimws(gsub("_log|_none", "", unique(df$id)))
if(cue != "I+Ig"){
df2 <- df %>% filter(variable == cue)
if(str_detect(unique(df$id), "log")){
df2 <- df2 %>%
mutate(value = log10(value))
}
}
if(cue == "I+Ig"){
df2 <- df %>% filter(variable %in% c("I", "Ig")) %>%
group_by(time) %>%
mutate(value = sum(value))
if(str_detect(unique(df$id), "log")){
df2 <- df2 %>%
mutate(value = log10(value))
}
}
df2$value[df2$value == -Inf] <- 0
write_parquet(df2, paste0(here("data/partition/si_default_state/"), unique(df$id), "_noW_state.parquet"))
}
run function
# split dynamics based on id
no_W.split <- split(no_W.df, no_W.df$id)
# run function
mclapply(no_W.split, get_cue_state)
# get dataframe
no_W.state <- list.files(here("data/partition/si_state/"), pattern = "*.parquet", full.names = T)
no_W.state <- lapply(no_W.state, read_parquet)
no_W.state <- do.call(rbind, no_W.state)
no_W.state$value[no_W.state$value < 0] <- 0
# get same for si infection
default.split <- split(si_dyn.df, si_dyn.df$id)
mclapply(default.split, get_cue_state)
default.state <- list.files(here("data/partition/si_default_state/"), pattern = "*.parquet", full.names = T)
default.state <- lapply(default.state, read_parquet)
default.state <- do.call(rbind, default.state)
default.state$value[default.state$value < 0] <- 0
# manually correct non-logging
I_Ig.corr <- no_W.state %>% filter(id == "I+Ig_log") %>%
mutate(value = log10(value))
I_Ig.corr$value[I_Ig.corr$value < 0] <- 0
no_W.state2 <- no_W.state %>% filter(id != "I+Ig_log")
no_W.state2 <- no_W.state2 %>% rbind(no_W.state2, I_Ig.corr)
plot
absence of targeted immunity led to drastic increase in parasite density in early phases of infection. This produces high signal intensity for parasite and host-based cues, especially non-logged ones, which allows for state differentation. While this can be viewed as a modelling artifiact, it should be noted that the logged cues seldom changed as these changes in early infection did little to alter the actual early signal intensity sensed by the parasite. In an environment where there is heterogeneity in host response, and thus, signal, logging allows for parasites to adapt optimal strategy whereas non-logged cues must contend with sensitivity to immunity.
# function to individually plot stuff
plot_state <- function(df1, df2){
# plot state dynamics
state_pl <- ggplot() +
geom_line(data = df1, aes(x = time, y = value, color = name, group = name)) +
facet_wrap(~ez_label_si, scales = "free") +
xlim(1,20) +
theme_bw() +
theme(legend.position="none") +
labs(x = "", y = "Cue") +
scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59"))
# plot conversion rate dynamics
cr_pl <- ggplot() +
geom_raster(data = df2, aes(x = time, y = name, fill = value)) +
xlim(1,20) +
theme_bw() +
labs(x = "Time (days)") +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
legend.position = "none") +
scale_fill_viridis_c(lim = c(0, 1))
# arrange
ggarrange(state_pl, cr_pl, ncol = 1, nrow = 2, align = "v", heights = c(1, 0.4))
ggsave(paste0(here("figures/plos-bio/partition/"), unique(df1$id), ".tiff"), width = 4.5, height = 3.5)
}
split
# combine state
noW_default.state <- left_join(
select(no_W.state2, time, `No targeted\nimmunity` = value, id, ez_label_si),
select(default.state %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.state2 <- tidyr::pivot_longer(noW_default.state, c(`No targeted\nimmunity`, `Default`))
# combine conversion raster
noW_default.cr <- left_join(
select(no_W.cr, time, `No targeted\nimmunity` = value, id, ez_label_si),
select(si_dyn.cr %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.cr2 <- tidyr::pivot_longer(noW_default.cr, c(`No targeted\nimmunity`, `Default`))
# split
noW_default_state.ls <- split(noW_default.state2, noW_default.state2$id)
noW_default_cr.ls <- split(noW_default.cr2, noW_default.cr2$id)
# run function
mapply(plot_state, noW_default_state.ls, noW_default_cr.ls)
#——– reaction norms of default vs optimized ————# # get reaction norm and rug data
source(here("functions/par_to_df.R"))
# Gametocyte
g_log.rn <- par_to_df(par = c(1.211521, -3.936778, -1.312944, -1.285713), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g_log.rn2 <- par_to_df(par = c(1.393860539, -4.253007616, -0.313947029, -2.000857344), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g.rn <- par_to_df(par = c(0.04061288, -9.31445958, 74.13015506, -431.5984364), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
g.rn2 <- par_to_df(par = c(0.541729073, -3.904616443, 0.87487412, -0.694177021), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
# I+Ig
I_Ig_log.rn <- par_to_df(par = c(3.594042, 4.157744, -13.530672, 2.599905), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig_log.rn2 <- par_to_df(par = c(63.71893822, -87.77671601, -56.55475514, -66.02209549), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig.rn <- par_to_df(par = c(0.3159297, -46.1104558, 1250.752908, -6.1982093), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
I_Ig.rn2 <- par_to_df(par = c(0.731982784, -21.69799449, 149.7841876, 17.02551769), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
# convert log to non-logged scale
g_log.rn$cue_range <- 10^(g_log.rn$cue_range)
g_log.rn2$cue_range <- 10^(g_log.rn2$cue_range)
I_Ig_log.rn$cue_range <- 10^(I_Ig_log.rn$cue_range)
I_Ig_log.rn2$cue_range <- 10^(I_Ig_log.rn2$cue_range)
# get rug
g_log.rug <- default.state %>%
filter(label_si == "G log") %>%
mutate(value = 10^value) %>%
select(label_si, value)
g_log.rug2 <- no_W.state %>%
filter(label_si == "G log") %>%
mutate(value = 10^value) %>%
filter(value <= 6*(10^4)) %>%
select(label_si, value)
I_Ig_log.rug <- default.state %>%
filter(label_si == "I+Ig log") %>%
select(label_si, value)
I_Ig_log.rug2 <- no_W.state %>%
filter(label_si == "I+Ig log") %>%
select(label_si, value)
g.rug <- default.state %>%
filter(label_si == "G") %>%
select(label_si, value)
g.rug2 <- no_W.state %>%
filter(label_si == "G" & value <= 6*(10^4)) %>%
select(label_si, value)
I_Ig.rug <- default.state %>%
filter(label_si == "I+Ig") %>%
select(label_si, value)
I_Ig.rug2 <- no_W.state %>%
filter(label_si == "I+Ig") %>%
select(label_si, value)
# get rug limits
rug_lim <- rbind(g_log.rug,
g_log.rug2,
I_Ig_log.rug,
I_Ig_log.rug2,
g.rug,
g.rug2,
I_Ig.rug,
I_Ig.rug2) %>%
group_by(label_si) %>%
summarize(max = max(hablar::s(value), na.rm = T),
min = min(hablar::s(value), na.rm = T))
# combine and filter
rn <- rbind(
cbind(g_log.rn, label_si = "G log", condition = "Default"),
cbind(g_log.rn2, label_si = "G log", condition = "No targeted\nimmunity"),
cbind(g.rn, label_si = "G", condition = "Default"),
cbind(g.rn2, label_si = "G", condition = "No targeted\nimmunity"),
cbind(I_Ig_log.rn, label_si = "I+Ig log", condition = "Default"),
cbind(I_Ig_log.rn2, label_si = "I+Ig log", condition = "No targeted\nimmunity"),
cbind(I_Ig.rn, label_si = "I+Ig", condition = "Default"),
cbind(I_Ig.rn2, label_si = "I+Ig", condition = "No targeted\nimmunity")
) %>%
left_join(rug_lim, by = "label_si") %>%
group_by(label_si) %>%
filter(cue_range <= max & cue_range >= min)
# combine rug
rug <- rbind(cbind(g_log.rug, condition = "Default"),
cbind(g_log.rug2, condition = "No targeted\nimmunity"),
cbind(g.rug, condition = "Default"),
cbind(g.rug2, condition = "No targeted\nimmunity"),
cbind(I_Ig_log.rug, condition = "Default"),
cbind(I_Ig_log.rug2, condition = "No targeted\nimmunity"),
cbind(I_Ig.rug, condition = "Default"),
cbind(I_Ig.rug2, condition = "No targeted\nimmunity"))
# cobine with ezlabel
rn2 <- rn %>% left_join(ez_label, by = "label_si")
rug2 <- rug %>% left_join(ez_label, by = "label_si")
# filter rug
default.rug <- rug2 %>% filter(condition == "Default")
no.rug <- rug2 %>% filter(condition == "No targeted\nimmunity")
plot
ggplot() +
geom_line(data = rn2, aes(x = cue_range, y = cr, color = condition)) +
geom_rug(data = default.rug, aes(x = value, color = condition), sides = "b") +
geom_rug(data = no.rug, aes(x = value, color = condition), sides = "t") +
facet_wrap(~fct_relevel(ez_label_si, c("Gametocyte log10", "Gametocyte", "Asexual&sexual\niRBC log10", "Asexual&sexual iRBC")), scales = "free_x") +
scale_x_continuous(labels = function(x) format(x, scientific = TRUE)) +
theme_bw() +
scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59")) +
ylim(0, 1.1) +
labs(x = "Cue range", y = "Conversion rate", color = "Condition")
ggsave(here("figures/plos-bio/partition_rn.tiff"), width = 7.5, height = 6)
get conversion rate legend
noW_default.cr %>% filter(id == "G_log") %>%
ggplot() +
geom_raster(aes(x = time, y = id, fill = Default)) +
xlim(1,20) +
theme_bw() +
labs(x = "Time (days)",
fill = "Conversion rate") +
theme(axis.title.y=element_blank(),
axis.ticks.y=element_blank(),) +
scale_fill_viridis_c(lim = c(0, 1))
ggsave(here("figures/plos-bio/cr_legend.tiff"))
#================================# # Disease curves for single, co-infection, and invasion #===============================# # get data for disease curves
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet"))
# co-infection dynamics (mon-cue)
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
# dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))
#——- single cue comparison —————# # process data
#——— dual cue ————————–# # process data
# turn skinny
dual_dc.df <- dual_dyn.df %>%
mutate(label_alt = paste(label, "+" , label_b)) %>%
select(label_alt, time, variable, value) %>%
filter(variable == "I" | variable == "Ig" | variable == "R") %>%
distinct(label_alt, time, variable, .keep_all = T)
dual_dyn.df
dual_dc.df2 <- dual_dc.df %>%
tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, label_alt)) %>%
mutate(total = I+Ig)
#write_parquet(dual_dc.df2, here("data/disease_curve/dual_dc.parquet"))
# good dual cue -> list of good performing dual cues that encompass wide variety of cues
selected_dual_cue <- c("R log + I log", "R + Ig log", "G log + I log", "G log + Ig log", "Ig + I log")
bad_dual_cue <- c("G + I", "R + Ig", "R log + Ig", "G + R", "G + R log", "G + Ig", "Ig + I", "R + I", "R log + I")
# get classification -> R log10 + I log10 as the only good one
dual_dc.high <- dual_dc.df2 %>% filter(label_alt %in% selected_dual_cue) %>%
mutate(label_alt = gsub("log", "log10", label_alt))
dual_dc.poor <- dual_dc.df2 %>% filter(label_alt %in% bad_dual_cue) %>%
mutate(label_alt = gsub("log", "log10", label_alt))
#write_parquet(dual_dc.high, here("data/disease_curve/dual_dc_high.parquet"))
#write_parquet(dual_dc.poor, here("data/disease_curve/dual_dc_poor.parquet"))
#——- plotting single and dual cue disease map together——-# # plot

#———- co-infection monocue ————-# Note that we are plotting only single strain to show that on a single strain level, co-infection leads to larger area than single infection! # process data
# join high performing with label
ci_dc.high2 <- ci_dc.high %>% left_join(ez_label, by = c("label" = "label_ci"))
Error in is.data.frame(y) : object 'ez_label' not found
loop to get area: single infection
# split df
si_dc.ls <- split(si_dc.df, si_dc.df$id)
# get area
si_dc.area <- cbind.data.frame(area = as.numeric(lapply(si_dc.ls, cha)), id_alt = names(lapply(si_dc.ls, cha)))
# join with fitness
si_fitness.df2 <- si_fitness.df %>%
select(id, fitness = value) %>%
distinct(id, fitness) %>%
left_join(ez_label, by = c("id" = "id_si"))
si_dc.area <- si_dc.area %>%
left_join(si_fitness.df2, by = c("id_alt" = "id"))
dual cue
# split
dual.dc <- read_parquet(here("data/disease_curve/dual_dc.parquet"))
# get area
dual_dc.area <- cbind.data.frame(area = as.numeric(lapply(dual_dc.ls, cha)), id_alt = names(lapply(dual_dc.ls, cha)))
# get R log+I log area: 293632850541
dual_dc.area %>% filter(id_alt == "R log + I log")
coinfection
# split
ci_dc.ls <- split(ci_dc.df, ci_dc.df$label)
# run function to find area
ci_dc.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc.ls, cha)), id_alt = names(lapply(ci_dc.ls, cha)))
# join with fitness
ci_dc.area <- ci_dc.area %>%
left_join(select(ci_fitness.df, label, fitness = value), by = c("id_alt" = "label"))
ci_dc.area
#—— get fitted scatter plot for all single infection, co infection, and dual cue ——–#
"Area", y = "Fitness") +
Error: unexpected ',' in " "Area","
#——- plot together with disease curve ——–#
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "v", widths = c(1, 0.5))
# co-infection
ci_vir.pl <- ggarrange(ci_dc.pl, ci_area.pl, align = "v", widths = c(1, 0.5))
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "v", widths = c(1, 0.5))
# co-infection
ci_vir.pl <- ggarrange(ci_dc.pl, ci_area.pl, align = "v", widths = c(1, 0.5))
#——— static area comparison ————-# # compute area
# import in dc dynamic and fitness
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
static_fitness.df <- read.csv(here("data/ci_static.csv"))
# get winner and loser
static_dc.df4 <- static_dc.df %>%
left_join(select(static_fitness.df, id_1, id_2, fitness_difference), by = c("id_1", "id_2")) %>%
filter(id_1 != id_2) %>%
mutate(
total_winner = case_when(
fitness_difference > 0 ~ total1,
fitness_difference< 0 ~ total2
),
total_loser = case_when(
fitness_difference > 0 ~ total2,
fitness_difference< 0 ~ total1
))%>%
filter(abs(fitness_difference) > 0.1)
# split by winner and loser
static_dc.ls1 <- split(select(static_dc.df4, R, total = total_winner), static_dc.df4$id_alt)
static_dc.ls2 <- split(select(static_dc.df4, R, total = total_loser), static_dc.df4$id_alt)
# get area
static_win.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls1, cha)), status = "Winner")
static_loser.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls2, cha)), status = "Loser")
# pair
static.area <- cbind(select(static_win.area, Winner = area),
select(static_loser.area, Loser = area)) %>%
mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
static.area
plot static
static.area
static_area.pl <- ggpaired(static.area, cond1 = "Winner", cond2 = "Loser", line.color = "classification", alpha = 0.2) +
labs(x = "Status", y = "Area", color = "Comparison\n(Static)") +
scale_color_manual(values = c("Loser larger area" = "#fc8d59", "Winner larger area" = "#4575b4")) +
stat_compare_means(paired = TRUE, hjust = 0) +
guides(color=guide_legend(nrow=2,byrow=TRUE))
#——— invasion area comparison —————–# # get area
# import in dc dynamic and fitness
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))
# calculate area
invasion_dc.df4 <- invasion_dc.df %>%
left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>%
mutate(
total_winner = case_when(
fitness> 0 ~ total1,
fitness< 0 ~ total2
),
total_loser = case_when(
fitness > 0 ~ total2,
fitness < 0 ~ total1
)) %>%
filter(abs(fitness) > 0.1)
invasion_dc.df4 %>% distinct(id_alt)
# split by winner and loser
invasion_dc.ls1 <- split(select(invasion_dc.df4, R, total = total_winner), invasion_dc.df4$id_alt)
invasion_dc.ls2 <- split(select(invasion_dc.df4, R, total = total_loser), invasion_dc.df4$id_alt)
# get area
invasion_win.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls1, cha)), status = "Winner")
invasion_loser.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls2, cha)), status = "Loser")
# pair
invasion.area <- cbind(select(invasion_win.area, Winner = area),
select(invasion_loser.area, Loser = area)) %>%
mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
invasion.area
plot
#—— plot together ————-#

#====================================# # dual cue dynamics figure #===================================#
get dual dynamics
dual.dyn <- chabaudi_si_clean(
parameters_cr = c(4.446192033, 10.97518275, 1.38762817, 23.3059254, -3.452052371, -18.0070692, 39.66614226, -3.545193141, 18.78350799),
immunity = "tsukushi",
parameters = parameters_tsukushi,
time_range = seq(0, 30, by = 1e-3),
cue_range = seq(6, 7, by = 1/500),
cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
cue = "R",
cue_b = "I",
log_cue = "log10",
log_cue_b = "log10",
solver = "vode",
dyn = T
)
# filter out relevent dataframes
dual.dyn_f <- dual.dyn %>%
filter(variable %in% c("I", "Ig", "G", "R", "N", "W"))
# cr only
dual.dyn_cr <- dual.dyn %>% filter(variable == "cr")
plot
dual_I.plt <- ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "I"), aes(x = time, y = value/(10^5)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "I" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^5, name="Asexual iRBC per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_Ig.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "Ig"), aes(x = time, y = value/(10^5)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "Ig" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^5, name="Sexual iRBC per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_G.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "G"), aes(x = time, y = value/(10^4)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "G" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^4)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^4, name="Gametocyte per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_R.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "R"), aes(x = time, y = value/(10^7)),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "R" & row_number() %% 1000 == 0),
aes(x = time, y = value/(10^7)), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*10^7, name="RBC per µL")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_N.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "N"), aes(x = time, y = value*10),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "N" & row_number() %% 1000 == 0),
aes(x = time, y = value*10), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*0.1, name="Indiscriminate immunity")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
dual_W.plt <-ggplot() +
geom_line(data = dual.dyn_f %>% filter(variable == "W"), aes(x = time, y = value*2),
color = "#4575b4") +
geom_point(data = dual.dyn_f %>% filter(variable == "W" & row_number() %% 1000 == 0),
aes(x = time, y = value*2), size = 2, color = "#4575b4") +
geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
scale_y_continuous(name = "Conversion rate",
sec.axis = sec_axis(~.*0.5, name="Targeted immunity")) +
labs(x = "Time (days)") +
xlim(0, 20) +
theme_bw() +
theme(legend.position = "none")
plot together
ggarrange(dual_I.plt, dual_Ig.plt, dual_G.plt, dual_R.plt, dual_N.plt, dual_W.plt, nrow = 3, ncol = 2, align = "hv", labels = c("A", "B", "C", "D", "E", "F"))
ggsave(here("figures/plos-bio/dual_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1, dpi=300, bg = "white")
#======================================# # Single and co-infection verification #======================================# # single infection
# import in all single infection data
si_val.ls <- list.files(path = here("data/si_validation"), pattern = "*.csv", full.names = T)
si_val.df <- lapply(si_val.ls, read.csv)
si_val.df <- do.call(rbind, si_val.df)
# get max fitness from simulation. left join with si_opt
si_opt.df <- read.csv(here("data/si_opt.csv"))
# we can see that all of the randomly simulated models have a fitness value that is less than the optimized model
si_val.df2 <- select(si_val.df, V1, id) %>%
left_join(si_opt.df, by =c("id" = "id")) %>%
mutate(fitness_difference = fitness_20 - V1) %>%
left_join(select(ez_label, id_si, ez_label_si), by = c("id" = "id_si"))
Model validation
# read in the results file
ci_val.ls <- list.files(path = here("data/ci_validation"), pattern = "*.csv", full.names = T)
ci_val.ls2 <- lapply(ci_val.ls, read.csv)
ci_val.df <- do.call(rbind, ci_val.ls2)
ci_val.df2
ci_val.df2 <- ci_val.df %>%
left_join(select(ez_label, label_ci, ez_label), by = c("label" = "label_ci"))
plot
si_val.plt <- ggplot(data = si_val.df2, aes(x = fitness_difference)) +
geom_histogram(bins = 50) +
geom_vline(xintercept = 0, color = "#fc8d59") +
facet_wrap(~ez_label_si, scales = "free", ncol = 3) +
labs(x = "Optimized fitness - random fitness", y = "Frequency") +
theme_bw()
ci_val.plt <- ggplot(data = ci_val.df2, aes(x = V1)) +
geom_histogram(bins = 50) +
geom_vline(xintercept = 0, color = "#fc8d59") +
facet_wrap(~ez_label, scales = "free", ncol = 4) +
labs(x = "Fitness difference between\noptimized and random strain", y = "Frequency") +
theme_bw()
ggarrange(si_val.plt, ci_val.plt, align = "hv", labels = c("A", "B"), widths = c(3,4))
ggsave(here("figures/plos-bio/validation.tiff"), units = "px", width = 2250, height = 1300, scale = 1.6, dpi=300, bg = "white")
#=========================# # Monte carlo dynamics supplementary #=========================# # run code in report 16

---
title: "R Notebook"
output: html_notebook
---
```{r}
library(dplyr)
library(ggplot2)
library(forcats)
library(here)
library(deSolve)
library(crone)
library(optimParallel)
library(doParallel)
library(doRNG)
library(arrow)
library(stringr)
library(parallel)
library(ggpubr)
library(scales)
```

Notebook for plotting all of the figures for PloS Biology manuscript submission

Guidelines: taken from https://journals.plos.org/plosbiology/s/figures#loc-figure-file-requirements
1. format: eps
2. max file size: 10 MB
3. text size: Arial, Times, or Symbol font only in 8-12 point
2. figure size: Width: 789 – 2250 pixels (at 300 dpi). Height maximum: 2625 pixels (at 300 dpi).


#=========================================#
figure 1: best single and co-infection cue
#=========================================#
Figure displaying the reaction norms of best single and co-infection.

#------- optimal cue reaction norm -----------#
# read data
```{r}
# single infection dynamics, reaction norms, and rugs
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 
si_rn.df <- read_parquet(here("data/si_dyn/si_rn.parquet"))
si_rug.df <- read_parquet(here("data/si_dyn/si_rug.parquet")) %>% 
  distinct(value, label, .keep_all = T)

# co-infection dynamics, reaction norms, and rugs
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))
ci_rn.df <- read_parquet(here("data/ci_dyn/ci_rn.parquet"))
ci_rug.df <- read_parquet(here("data/ci_dyn/ci_rug.parquet")) %>% 
  distinct(value, label, .keep_all = T)

ci_rug.df
```

# process data for reaction norm
```{r}
# import labelling scheme
ez_label <- read.csv(here("data/ez_label.csv"))

# get si_label with ci cue range
si_ci_rug.df <- ci_rug.df %>% 
  mutate(label_si = case_when(
    label %in% c("I", "I1+I2") ~ "I",
    label %in% c("I log","I1+I2 log") ~ "I log",
    label %in% c("Ig", "Ig1+Ig2") ~ "Ig",
    label %in% c("Ig log") ~ "Ig log",
    label %in% c("sum", "I+Ig") ~ "I+Ig",
    label %in% c("sum log", "I+Ig log") ~ "I+Ig log",
    label == "R" ~ "R",
    label == "R log" ~ "R log",
    label %in% c("G", "G1+G2") ~ "G",
    label == "G log" ~ "G log"
  )) 

# get limit for si_rug
si_rug_lim.df <- si_rug.df %>% 
  filter(time <= 20) %>%
  group_by(label)%>% 
  summarise(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  select(label_si = label, min_si = min, max_si = max)

# filter to restriction conversion rate reaction norm range to cue ranges that appear in rug
## change to Inf/-inf to NA. Note that I am first joining with si rug lim to check which limit is larger, We will go with the cue range that has the largest span
ci_rug_lim.df <- si_ci_rug.df %>% 
  group_by(label) %>% 
  mutate(min = min(value, na.rm = T)*0.9,
         max = max(value, na.rm = T)*1.1) %>% 
  distinct(label, .keep_all = T) %>% 
  select(label, label_si, min, max)

rug_lim.final <- ci_rug_lim.df %>% left_join(si_rug_lim.df, by = "label_si") %>% 
  mutate(final_min = min(min, min_si),
         final_max = max(max, max_si))

# get second rug_lim.final for single infection
rug_lim.final2 <- rug_lim.final %>% 
  group_by(label_si) %>% 
  mutate(final_min = min(final_min, na.rm = T),
         final_max = max(final_max, na.rm = T))

# filter ci_rn by limit
ci_rn.df2 <- ci_rn.df %>% 
  left_join(rug_lim.final, by  = "label") %>% 
  group_by(label) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  arrange(cue_range, .by_group = T) %>% 
  filter(row_number() %% 10 == 0) 
```

# match single infection rn with coinfection 
```{r}
# get ci label to si rug and filter by limit
si_rn.df2 <- left_join(si_rn.df, select(ez_label, label_si, label_ci), by = c("label" = "label_si")) %>% 
  left_join(rug_lim.final, by  = c("label_ci" = "label")) %>% 
  group_by(label_ci) %>% 
  filter(cue_range <= final_max & cue_range >= final_min) %>% 
  arrange(cue_range, .by_group = T) %>% 
  filter(row_number() %% 10 == 0) %>% 
  select(cue_range, cr, label_ci, label_si)

# get ci label to si rug, we will keep one unique value per label
si_rug.df2 <- select(si_rug.df, value, label_si = label) %>% distinct(value, label_si)
```

# plot reaction norm
```{r}
# join with ezlabel
ci_rn.df3 <- ci_rn.df2 %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rn.df3 <- si_rn.df2 %>% left_join(ez_label, by = "label_ci")
ci_rug.df3 <- ci_rug.df %>% left_join(ez_label, by = c("label" = "label_ci"))
si_rug.df3 <- si_rug.df2 %>% left_join(ez_label, by = "label_si")
```


```{r}
# redo order of cues
ci_rn.df3$ez_label <- factor(ci_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10")) 

si_rn.df3$ez_label <- factor(si_rn.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

ci_rug.df3$ez_label <- factor(ci_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))

si_rug.df3$ez_label <- factor(si_rug.df3$ez_label, 
                              levels = c("Asexual iRBC", "Asexual iRBC log10",
                                         "Total asexual iRBC", "Total asexual\niRBC log10",
                                         "Sexual iRBC", "Sexual iRBC log10",
                                         "Asexual&sexual iRBC", "Asexual&sexual\niRBC log10",
                                         "Total iRBC", "Total iRBC log10",
                                         "Gametocyte", "Gametocyte log10",
                                         "Total gametocyte", "Total sexual iRBC",
                                         "RBC", "RBC log10"))
# plot
ggplot() +
  geom_line(data = ci_rn.df3, aes(x = cue_range, y = cr, color = "Co-infection")) +
  geom_point(data = ci_rn.df3 %>% 
    group_by(label) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Co-infection", shape = "Co-infection"), size = 2) +
  geom_line(data = si_rn.df3, aes(x = cue_range, y = cr, color = "Single infection")) +
  geom_point(data = si_rn.df3 %>% 
    group_by(label_ci) %>% 
    mutate(ten_th = round(n()/10)) %>% 
    filter(row_number() %% ten_th == 0), aes(x = cue_range, y = cr, color = "Single infection", shape = "Single infection"), size = 2) +
  geom_rug(data = ci_rug.df3, aes(x = value), color = "#4575b4", sides = "t", length = unit(0.1, "npc")) +
  geom_rug(data = si_rug.df3, aes(x = value), color = "#fc8d59", sides = "b", length = unit(0.1, "npc")) +
  facet_wrap(~ez_label, scales = "free_x", ncol = 2) +
  ylim(-0.3, 1.3) +
  theme_bw() +
  labs(y = "Conversion rate", x = "Cue range", color = "Model", shape = "Model") +
  scale_x_continuous(labels = function(x) format(x, scientific = T),
                     guide = guide_axis(check.overlap = TRUE)) + 
                    theme(axis.text.x = element_text(size = 7),
                          legend.position = "right")  +
  scale_color_manual(values=c( "#4575b4", "#fc8d59")) +
  theme(strip.text.x = element_text(margin = margin(b = 0.5, t = 0.5)))

ggsave(units = "px", dpi = 300, width = 2000, height = 2500, filename = here("figures/plos-bio/reaction_norm.tiff"), bg = "white", scale = 1.1)
```

#=========================================#
Plotting single and co-infection fitness scatter plot
#=========================================#
# import in data
```{r}
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

ez_label <- read.csv(here("data/ez_label.csv"))
```

# process for final 20 days fitness
```{r}
# get single infection maximum tau_cum for 20 days
si_fitness.df <- si_dyn.df %>% 
  filter(variable == "tau_cum" & time == 20)

# get co-infection maximum tau_cum for 20 days
ci_fitness.df <- ci_dyn.df %>% 
  filter(variable == "tau_cum1" & time == 20)

# join together two dataframes and add labels
si_ci_fitness.df <- select(ci_fitness.df, fitness_ci = value, label_ci = label) %>% 
  left_join(ez_label, by = "label_ci") %>% 
  left_join(select(si_fitness.df, fitness_si = value, id_si = id), by = "id_si") %>% 
  select(ez_label_si, ez_label, fitness_si, fitness_ci) %>% 
  rbind(data.frame( # add time
    ez_label_si = "Time", 
    ez_label = "Time",
    fitness_si =  9.787899,
    fitness_ci  = 2.311841
  ))

si_ci_fitness.df
```

# plot scatter point of single infection vs co-infection
```{r}
si_ci_fitness.pl <- ggplot() +
  geom_point(data = si_ci_fitness.df, aes(x = fitness_si, y = fitness_ci, color = ez_label_si, shape = ez_label_si), size = 3.5) +
  ggrepel::geom_label_repel(data = si_ci_fitness.df, aes(label = ez_label, x = fitness_si, y = fitness_ci),
                            fill = "white",xlim = c(-Inf, Inf), ylim = c(NA, NA)) +
  labs(x = "Maximum single infection fitness", y = "Maximum Co-infection fitness (per strain)",
       color = "Single infection cue", shape = "Single infection cue") +
  scale_shape_manual(values = 15:25) +
  lims(x = c(8, 10.3)) +
  theme_bw()
```
#=========================================================#
# time series conversion rate for single and co-infection
#=========================================================#
#---------get "ideal" single infection and co-infection dynamics---------#
This is when stuff are optimized based on time
```{r}
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/chabaudi_ci_clean.R"))

# single infection dynamic with time as cue. Optimized using local optimizer. Note that the time variable in dual cue is slightly different with higher flexibility. While that increases the fitness value by ~0.1, the overall conversion rate dynamic does not change that much
si_t.df <- chabaudi_si_clean(
  parameters_cr = c(4.55386, -13.0056, 4.15466, -11.9424),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, 20, by = 1e-3),
  cue = "t",
  solver = "vode",
  dyn = T)

# co-infection dynamic with time as cue
ci_t.df <- chabaudi_ci_clean(parameters_cr_1 = c(26.16425,	-71.07799,	53.34121,	-166.25693),
                            parameters_cr_2 = c(26.16425,	-71.07799,	53.34121,	-166.25693),
                            immunity = "tsukushi",
                            parameters = parameters_tsukushi,
                            time_range = time_range, 
                            cue_1 =  "t",
                            cue_2= "t",
                            cue_range_1 = time_range, # cue range of strain 1
                            cue_range_2 = time_range, # cue range of strain 2
                            log_cue_1 = "none", # whether to log transform cue 1
                            log_cue_2 = "none", # whether to log transform cue 2
                            solver = "vode", # solver for numerical integration. Vode often gives faster runs
                              dyn = T)

# get only conversion rate and make same format as si_cr.df2/ci_cr.df2
si_t.df %>% filter(time == 20 & variable == "tau_cum") ## fitness value of time as cue in single infection
si_t.cr <- si_t.df %>% 
  filter(variable == "cr") %>% 
  mutate(ez_label_si = "Time", 
         fitness_si =9.787899) %>% 
  select(time, value, fitness_si, ez_label_si)


## get fitness value of co-infection time
ci_t.df %>% filter(variable == "tau_cum1") %>% summarize(max = max(value, na.rm = T))
ci_t.cr <- ci_t.df %>% 
  filter(variable == "cr_1") %>% 
  mutate(ez_label = "Time", 
         fitness_ci = 2.311841,
         label = "Time") %>%
  select(time, value, fitness_ci, ez_label, label)
```

#--------- single infection conversion rate heat map--------------#
# process info for single infection
```{r}
# get conversion rate dynamics
si_cr.df <- si_dyn.df %>% 
  filter(time <= 20 & variable == "cr")

# get time ranges. We will omit any values where I is below 100. Note for all single infection, it is form  0.55 to 20. Note we are not doing I+Ig because conversion rate is only relevent when asexual iRBC is around to differentiate!
si_dyn.df %>% 
  filter(time <= 20 & variable == "I" & value > 100) %>%
  group_by(id) %>% 
  summarize(min_time = min(time),
            max_time = max(time))

# join fitness and ideal dynamics
si_cr.df2 <- si_cr.df %>% 
  left_join(select(si_fitness.df, id, fitness_si = value), by = "id") %>% 
  left_join(ez_label, by = c("id" = "id_si")) %>% 
  select(time, value, fitness_si, ez_label_si) %>% 
  rbind(si_t.cr) # join with dataframe containing ideal dynamics

# plot. note that we are omitting day 1 conversion rate dynamic because all cr at that stage is 0 (by model default)
si_cr.pl <- ggplot(data = si_cr.df2, aes(x = time, y = forcats::fct_reorder(ez_label_si, fitness_si))) +
  geom_raster(aes(fill = value)) +
  labs(x = "Time (days)", y = "Single infection cues", fill = "Conversion rate") +
  viridis::scale_fill_viridis(limits = c(0, 1), oob = scales::squish) +
  xlim(1, 20) +
  theme_bw()
```

# -----------------co-infection conversion rate heatmap-----------#
# plot co-infeciton convesion rate heatmap
```{r}
# get conversion rate dynamics
ci_cr.df <- ci_dyn.df %>% 
  filter(time <= 20 & variable == "cr_1")

# get time range to include. Same criteria. we need if single strain population declines to less than 100
## when time is used as a cue: 12.943
ci_t.df %>%
  filter(time <= 20 & variable == "I1" & value > 100) %>% 
  summarize(max_time = max(time))
## all other dynamics
ci_cr.time <- ci_dyn.df %>% 
  filter(time <= 20 & variable == "I1" & value > 100) %>% 
  group_by(label) %>% 
  summarize(max_time = max(time)) %>% 
  rbind(data.frame(label = "Time", max_time = 12.943))

# join with fitness, ideal dynamics and also maximum time range considered
ci_cr.df2 <- ci_cr.df %>% 
  left_join(select(ci_fitness.df, label, fitness_ci = value), by = "label") %>% 
  left_join(ez_label, by = c("label" = "label_ci")) %>% 
  select(time, value, fitness_ci, ez_label, label) %>% 
  rbind(ci_t.cr) %>% # bind ideal dynamics
  left_join(ci_cr.time, by = "label") %>% 
  filter(time <= max_time) # keep only those that are within time range

# plot
ci_cr.pl <- ggplot() +
  geom_raster(data = ci_cr.df2, 
              aes(x = time, y = forcats::fct_reorder(ez_label, fitness_ci), fill = value)) +
  labs(x = "Time (days)", y = "Co-infection cues", fill = "Conversion rate") +
  viridis::scale_fill_viridis(limits = c(0, 1), oob = scales::squish) +
  xlim(1, 20) +
  theme_bw()
```

#--------- assemble final figure --------------#
```{r}
# combine conversion rate dynamic of single infection and co-infection
cr.pl <- ggarrange(si_cr.pl, ci_cr.pl, labels = c("B", "C"), ncol = 2, common.legend = T, align = "h")

# combine with fitness scatterplot
ggarrange(si_ci_fitness.pl, cr.pl, ncol = 1, labels = c("A", ""), align = "hv")

# save
ggsave(units = "px", dpi = 300, width = 2000, height = 2000, filename = here("figures/plos-bio/fitness_cr-dyn.tiff"), bg = "white", scale = 1.35)
```

#===========================================================#
# Demographic stochasticity
#===========================================================#
#---------- plot heat map---------------#
# import in all fitness files
```{r}
file_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv", full.names = T)
name_ls <- list.files(path = here("data/MC_partitioned/"), pattern = "*.csv")
name_ls <- gsub("*.csv", "", name_ls)

# 60, which is about right
length(file_ls)

# read in files
fitness.ls <- lapply(file_ls, read.csv)

# assign unique ID
fitness.ls <- mapply(cbind, fitness.ls, "ID" = name_ls, SIMPLIFY = F)
```

# process data
```{r}
# get metainfo from ID
fitness.ls2 <- mclapply(fitness.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- unlist(str_split(unique(id_col), pattern = "_"))[[4]]
  rand_var <- unlist(str_split(unique(id_col), pattern = "_"))[[5]]
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = rand_var, mean_fitness = mean_fitness, sd_fitness = sd_fitness)
  return(res)
})
```

# Get reference data
```{r}
reference_ls <- list.files(path = here("data/MC2"), pattern = "*.csv", full.names = T)
reference_name.ls <- gsub("*.csv", "", list.files(path = here("data/MC2/"), pattern = "*.csv"))

# read in the files
reference.ls <- lapply(reference_ls, read.csv)

# assign unique ID
reference.ls <- mapply(cbind, reference.ls, "ID" = reference_name.ls, SIMPLIFY = F)

# get meta data
reference.ls2 <- mclapply(reference.ls, function(x){
  id_col <- x$ID
  # string split to extract all info
  cue <- unlist(str_split(unique(id_col), pattern = "_"))[[2]]
  # get log
  third_col <- unlist(str_split(unique(id_col), pattern = "_"))[[3]]
  log <- ifelse(third_col == "log", "log10", "none")
  
  # get mean
  mean_fitness <- mean(x$max_fitness)
  
  # get sd
  sd_fitness <- sd(x$max_fitness)
  
  # bind results
  res <- cbind(x, cue= cue, log = log, rand_var = "all", ref_mean_fitness = mean_fitness, ref_sd_fitness = sd_fitness)
  return(res)
})
```

# combine MC partitioned and reference df
```{r}
# get unique column values for each cue, log, and rand_var combo
fitness.ls3 <- do.call(rbind, fitness.ls2)
fitness.ls3 <- fitness.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# repeat with reference
reference.ls3 <- do.call(rbind, reference.ls2)
reference.ls3 <- reference.ls3 %>% dplyr::distinct(ID, .keep_all = T)

# combine!
ref_fit.df <- left_join(fitness.ls3, reference.ls3, by = c("cue" = "cue", "log"= "log"))
```

# compute proportion fitness and variation
```{r}
ref_fit.df2 <- ref_fit.df %>% 
  mutate(p_sd = sd_fitness/ref_sd_fitness,
         p_mean = ref_mean_fitness/mean_fitness,
         cue_log = paste0(cue, "_", log),
         label = case_when(
           cue == "G" ~ "Gametocyte",
           cue == "I" ~ "Asexual iRBC",
           cue == "I+Ig" ~ "Asexual&sexual\niRBC",
           cue == "Ig" ~ "Sexual iRBC",
           cue == "R" ~ "RBC"
           ),
         parameter = case_when(
           rand_var.x == "rho" ~ "RBC replenishment (ρ)",
           rand_var.x == "phin" ~ "Half-life indis (ϕn)",
           rand_var.x == "phiw"~ "Half-life targeted (ϕw)",
           rand_var.x == "psin" ~ "Activation indis (ψn)",
           rand_var.x == "psiw" ~ "Activation targeted (ψw)",
           rand_var.x == "beta" ~ "Burst size (β)"
         ))
```

# plot!
```{r}
# variation
mc_b <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_sd)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(sd("1 parameter randomized"), sd("all parameters randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

# mean fitness
mc_c <- ggplot() +
  geom_tile(data = ref_fit.df2 , aes(x = label, y = parameter, fill = p_mean)) +
  facet_wrap(~log) +
  theme_bw() +
  viridis::scale_fill_viridis() +
  labs(x = "Cue", y = "Parameter randomized", fill = expression(frac(Mean("all parameters randomized"), Mean("1 parameter randomized")))) +
  theme(legend.position="top",
        axis.text.x = element_text(angle = 45, hjust=1))

mc_partition <- ggarrange(mc_b, mc_c, ncol = 1)

```

#-------------- get violine plot of variation in fitness --------------------#
# read MC data
```{r}
# read in dymamics
mc_G_log.dyn <- read_parquet(here("data/MC2/mc_G_log_dyn.parquet"))
mc_G.dyn <- read_parquet(here("data/MC2/mc_G_dyn.parquet"))
mc_R_log.dyn <- read_parquet(here("data/MC2/mc_R_log_dyn.parquet"))
mc_R.dyn <- read_parquet(here("data/MC2/mc_R_dyn.parquet"))
mc_I_log.dyn <- read_parquet(here("data/MC2/mc_I_log_dyn.parquet"))
mc_I.dyn <- read_parquet(here("data/MC2/mc_I_dyn.parquet"))
mc_Ig_log.dyn <- read_parquet(here("data/MC2/mc_Ig_log_dyn.parquet"))
mc_Ig.dyn <- read_parquet(here("data/MC2/mc_Ig_dyn.parquet"))
mc_I_Ig_log.dyn <- read_parquet(here("data/MC2/mc_I+Ig_log_dyn.parquet"))
mc_I_Ig.dyn <- read_parquet(here("data/MC2/mc_I+Ig_dyn.parquet"))

# read in fitness
mc_G_log.fitness <- read.csv(here("data/MC2/mc_G_log_fitness.csv"))
mc_G.fitness <- read.csv(here("data/MC2/mc_G_fitness.csv"))
mc_R_log.fitness <- read.csv(here("data/MC2/mc_R_log_fitness.csv"))
mc_R.fitness <- read.csv(here("data/MC2/mc_R_fitness.csv"))
mc_I_log.fitness <- read.csv(here("data/MC2/mc_I_log_fitness.csv"))
mc_I.fitness <- read.csv(here("data/MC2/mc_I_fitness.csv"))
mc_Ig_log.fitness <- read.csv(here("data/MC2/mc_Ig_log_fitness.csv"))
mc_Ig.fitness <- read.csv(here("data/MC2/mc_Ig_fitness.csv"))
mc_I_Ig_log.fitness <- read.csv(here("data/MC2/mc_I+Ig_log_fitness.csv"))
mc_I_Ig.fitness <- read.csv(here("data/MC2/mc_I+Ig_fitness.csv"))
```

# examine variation
```{r}
# plot fitness vs iteration
fitness.df <- rbind(
  cbind(mc_G_log.fitness, id = "Gametocyte log10"),
  cbind(mc_G.fitness, id = "Gametocyte"),
  cbind(mc_R_log.fitness, id = "RBC log10"),
  cbind(mc_R.fitness, id = "RBC"),
  cbind(mc_I_log.fitness, id = "Asexual iRBC log10"),
  cbind(mc_I.fitness, id = "Asexual iRBC"),
  cbind(mc_Ig_log.fitness, id = "Sexual iRBC log10"),
  cbind(mc_Ig.fitness, id = "Sexual iRBC"),
  cbind(mc_I_Ig_log.fitness, id = "Asexual & sexual iRBC log10"),
  cbind(mc_I_Ig.fitness, id = "Asexual & sexual iRBC")
)

# quantify variance and mean
fitness_var.df <- fitness.df %>% 
  dplyr::group_by(id) %>% 
  dplyr::summarise(median = median(max_fitness),
                   mean = mean(max_fitness)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median))
```

# plot violin with difference in deterministic model fitness and mean model fitness
```{r}
# get deterministic df
det.df <- data.frame(fitness_var.df, `Maximum fitness` =  c(8.49777, 9.494991,8.854682,9.573291,8.58856,9.561373,8.23991,8.181604,8.569285,9.618812)) %>% 
  dplyr::mutate(id = forcats::fct_reorder(id, median)) %>% 
  tidyr::pivot_longer(-id) %>% 
  mutate(classification = case_when(
    name == "Maximum.fitness" ~"Optimal single infection",
    name == "median" ~"Median Monte Carlo",
    name == "mean" ~ "Mean Monte Carlo"))

mc_a <- ggplot() +
  geom_point(data = det.df, aes(y = id, x = value, shape = classification, color = classification), size = 3, alpha = 0.8) +
  geom_violin(data = fitness.df, aes(y = id, x = max_fitness), fill = "transparent") +
  labs(x = "Fitness", y = "Cue", color = "Fitness", shape = "Fitness") +
  theme_bw() +
  theme(legend.position = "bottom") +
  scale_color_manual(values=c("black", "#4575b4", "#fc8d59"))
```

#-------------- plot together-----------------------#
```{r}
# arrange the heat map
ggarrange(mc_a, mc_partition, ncol = 2, nrow = 1, labels = c("A", "B"), align = "h")

# save
ggsave(units = "px", dpi = 300, width = 2100, height = 1400, filename = here("figures/plos-bio/MC.tiff"), bg = "white", scale = 1.5)
```


#--------------------------------------------#
# dual cue optimization figure
#--------------------------------------------#
```{r}
source(here("functions/chabaudi_si_clean_high.R"))
source(here("functions/chabaudi_si_clean.R"))
source(here("functions/par_to_hm_te.R"))
```

#---------- plotting fitness of dual vs single cue opt ---------#
# import in previous data
```{r}
# dual cue fitness
dual_fitness.df <- read.csv(here("data/dual_cue_opt4/dual_cue_fitness_20.csv"))
## make label and filter out very low fitness
dual_fitness.df <- dual_fitness.df %>% 
  mutate(temp_label = gsub("log", "log10", label),
         temp_label_b = gsub("log", "log10", label_b),
         label_final = paste0(temp_label, "+", temp_label_b)) %>% 
  filter(value > 2)

# get single cue fitness
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 
si_fitness.df <- si_dyn.df %>% 
  filter(variable == "tau_cum" & time == 20)

# join si and dual cue
dual_si_fitness.df <- dual_fitness.df %>% 
  left_join(select(si_fitness.df, id, si_fitness = value), by = "id") %>% 
  left_join(select(si_fitness.df, id_b = id, si_fitness_b = value), by = "id_b") %>% 
  mutate(si_fitness_max = ifelse(si_fitness > si_fitness_b, si_fitness, si_fitness_b),
         dual_label = gsub("log", "log10", paste(label, "+", label_b)))
```

# plot
```{r}
dual_si_fitness.pl <- ggplot() +
  geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = value, color = "Dual cue", shape = "Dual cue"), size = 3, alpha = 0.7) +
  geom_point(data = dual_si_fitness.df, aes(y = forcats::fct_reorder(dual_label, value), x = si_fitness_max, color= "Best single cue", shape= "Best single cue"), size = 3, alpha = 0.7) +
  labs(x = "Fitness", y = "Dual cue", color = "Cue", shape = "Cue") +
  scale_color_manual(values=c("#fc8d59", "#4575b4")) +
  geom_vline(xintercept = 9.883602) +
  geom_text(aes(x=9.883602, label="\nTime-based fitness (df=9)", y = "G + R log10"), angle=90) +
  xlim(8.35, 10) +
  theme_bw() +
  theme(legend.position = "top")
```


#----------- time series conversion rate -------------#
# dynamics simulation of high parameter cues (these serve as reference points)
```{r}
# best dual cue combo
dual.cr <- chabaudi_si_clean(
  parameters_cr = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(6, 7, by = 1/500),
  cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
  cue = "R",
  cue_b = "I",
  log_cue = "log10",
  log_cue_b = "log10",
  solver = "vode",
  dyn = T
)

# when time is used as a cue (high parameter)
time.cr <- chabaudi_si_clean_high(
  parameters_cr = c(9.154314,  -7.570829, -22.506638 ,  3.382405 ,-13.453519 ,-17.011485  , 3.678181, -12.851895 ,-26.115158),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, 20, by = 1e-3),
  cue = "t",
  solver = "vode",
  dyn = T)

# when asexual iRBC is used as a cue (high flexibility)
I_high.cr <- chabaudi_si_clean_high(
  parameters_cr = c(1.296675,  3.544034 , 4.907484,  2.174249, -3.238309 ,-5.181614 ,-1.645072 , 1.834302 , 1.581011),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000),
  cue = "I",
  log_cue = "log10",
  solver = "vode",
  dyn = T)

# when asexual iRBC is used as a cue (normal flexibility)
I.cr <- chabaudi_si_clean(
  parameters_cr = c(5.463558,	2.383948,	-17.757281,	4.571835),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000),
  cue = "I",
  log_cue = "log10",
  solver = "vode",
  dyn = T)

# when RBC is used as cue (high flexibility)
R_high.cr <- chabaudi_si_clean_high(
  parameters_cr = c(5.0340348 ,  0.5846168 ,  0.3749648 ,  0.6842673  , 2.4748107 , 10.9036034 , 16.8246316, -24.8690971 ,1.8007238),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 20, by = 1e-3),
  cue_range =  seq(log10(10^6), log10(10^7), by = (log10(10^7)-log10(10^6))/5000),
  cue = "R",
  log_cue = "log10",
  solver = "vode",
  dyn = T)

# process 
I_high.cr2 <- I_high.cr %>% filter(variable == "cr") %>% mutate(label_new = "I log10 (df=9)") %>% select(-variable)

R_high.cr2 <- R_high.cr %>% filter(variable == "cr") %>% mutate(label_new = "R log10 (df=9)") %>% select(-variable)

time_high.cr2 <- time.cr %>% filter(variable == "cr") %>% mutate(label_new = "Time (df=9)") %>% select(-variable)

dual.cr2 <- dual.cr %>% filter(variable == "cr") %>% mutate(label_new = "R log10+I log10\n(df=9)") %>% select(-variable)

# combine
dual_cr.df <- rbind(I_high.cr2, R_high.cr2, time_high.cr2, dual.cr2)
```

# plot
```{r}
dual_cr.pl <- ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 3) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#fc8d59","#fdcb44","black", "#4575b4")) +
  theme_bw() +
  theme(legend.position="right",
        plot.margin = margin(t = 40, r = 0, b = 0, l = 0, unit = "pt")) +
  guides(color = guide_legend(nrow = 4, byrow = TRUE))
```

#------------ reaction norm heatmap of R log10 + I log10 ------------#
# process data
```{r}
# make heatmap df
R_I.hm <- par_to_hm_te(par = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
             cue_range = seq(6,	7, length.out = 500),
             cue_range_b = seq(0,	6.77815125, length.out = 500))

# process dynamics
R_I.dyn <- dual.cr %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(log_R = log10(R),
         log_I = log10(I))

# examine sexual iRBC as well
R_Ig.dyn <- dual.cr %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(log_R = log10(R),
         log_Ig = log10(Ig))
max(R_Ig.dyn$Ig) 
```

# plot
```{r}
dual_rn.pl <- ggplot() +
  geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
  scale_fill_viridis_c() +
  geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
  xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
  ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
  labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion\nrate") +
  theme_dark() +
  theme(legend.position = "right")

# just testing for sexual iRBC vs RBC
ggplot() +
  geom_path(data = R_Ig.dyn, aes(x = Ig, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_Ig.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = Ig, y = log_R), color = "white") +
  scale_x_continuous(trans = "log10") +
  xlim(0, 200000) +
  labs(y = "RBC log10", x = "Sexual iRBC log10", fill = "Conversion rate") +
  theme_dark()
```

# save figure for poster
```{r}
dual_rn.pl2 <- ggplot() +
  geom_raster(data = R_I.hm, aes(x = cue_range_b, y = cue_range, fill = cr)) +
  scale_fill_viridis_c() +
  geom_path(data = R_I.dyn, aes(x = log_I, y = log_R), color = "white", arrow = arrow(angle = 30, length = unit(0.1, "inches"))) +
  geom_point(data = R_I.dyn %>% filter(row_number() %% 1000 == 1 & time <= 20), aes(x = log_I, y = log_R), color = "white") +
  xlim(0.99*min(hablar::s(R_I.dyn$log_I), na.rm = T), 1.01* max(hablar::s(R_I.dyn$log_I), na.rm = T)) +
  ylim(0.99*min(hablar::s(R_I.dyn$log_R), na.rm = T),1.01* max(hablar::s(R_I.dyn$log_R), na.rm = T)) +
  labs(y = "RBC log10", x = "Asexual iRBC log10", fill = "Conversion rate") +
  theme_dark() + theme(legend.position="top") 

dual_cr.pl2 <- ggplot() +
  geom_line(data = dual_cr.df, aes(color = label_new, x = time, y = value), size = 1) +
  geom_point(data = dual_cr.df %>% filter(time%%1 == 0), aes(color = label_new, x = time, y = value, shape = label_new), size = 2) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Cue", shape = "Cue") +
  xlim(0, 20) +
  scale_color_manual(values = c("#4575b4", "#91bfdb","#fc8d59","#fdcb44")) +
  theme_bw() +
  theme(legend.position="top") +
  guides(color = guide_legend(nrow = 2, byrow = TRUE))

ggarrange(dual_cr.pl2, dual_rn.pl2, align = "h", widths = c(1.25, 1))
ggsave(here("poster/dual_cue.png"), width = 7, height = 4)
```


#-------- assemble final figure -------------#
```{r}
# assemble panel B and C
dual_pl.BC <- ggarrange(dual_cr.pl, dual_rn.pl, align = "v", ncol = 1, labels = c("B", "C"))

# assemble panel A
ggarrange(dual_si_fitness.pl, dual_pl.BC, ncol = 2, labels = c("A", ""), widths = c(1,1.1))
ggsave(here("figures/plos-bio/dual_cue.tiff"), units = "px", width = 2100, height = 1400, scale = 1.3, dpi=300,  bg = "white")
```


#============================================#
# dynamics of dual cue simulation (all combinations)
#============================================#
# conversion rate
```{r}
# import in dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))

# keep only cr and make labels for plotting. we are going to omit any cue combinations containing I+Ig due to those not optimizing at all
dual_cr.df_p <- dual_dyn.df %>% 
  filter(variable == "cr" & cue != "I+Ig" & cue_b != "I+Ig") %>% 
  mutate(label_plot = paste(label, "+", label_b)) %>% 
  group_by(label_plot) %>% 
  arrange(time, .by_group = T) %>% 
  filter(row_number() %% 10 == 1) %>%  # cut down on number of rows
  left_join(select(dual_fitness.df, id, id_b, fitness = value), by = c("id", "id_b")) # get fitness

# plot raster
ggplot(data = dual_cr.df_p, aes(x = time, y = fct_reorder(label_plot, fitness), fill = value)) +
  geom_raster() +
  scale_fill_viridis_c() +
  xlim(1, 20) +
  labs(x = "Time (days)", y = "Dual cue combination", fill = "Conversion rate") +
  theme_bw()

ggsave(here("figures/plos-bio/dual_cue_cr.tiff"), units = "px", width = 2000, height = 1500, dpi=300,  bg = "white")
```

# cumultative transmission transmission potential
Proving a point to myself
```{r}
dual_tau.df_p <- dual_dyn.df %>% 
  filter(variable == "tau_cum" & cue != "I+Ig" & cue_b != "I+Ig") %>% 
  mutate(label_plot = paste(label, "+", label_b)) %>% 
  group_by(label_plot) %>% 
  arrange(time, .by_group = T) %>% 
  filter(row_number() %% 10 == 1) %>%  # cut down on number of rows
  left_join(select(dual_fitness.df, id, id_b, fitness = value), by = c("id", "id_b")) %>%  # get fitness 
  filter(fitness > 9.65)

# it seems that R log+I log only inches ahead of other at the very end of the infection, suggesting that terminal investment is at play
ggplot(data = dual_tau.df_p, aes(x = time, y = value, color = label_plot)) +
  geom_line() +
  scale_fill_viridis_c() +
  xlim(15, 20) +
  ylim(5,10) +
  labs(x = "Time (days)", y = "Dual cue combination", fill = "Conversion rate") +
  theme_bw()
```




#============================================#
# get dual cue disease map (simulated)
#============================================#
I am going to take the optimal dynamic (R log10 + I log10) and plot all possible disease curves and see if any follow the hysteresis curve. See below for real life disease curve.

# process 
```{r}
# get list of interested variables for plotting. drop cr, fitness, and death probability, immunity, and merozoites (which is not optimized in dual cue)
dual_var.ls <- unique(dual.cr$variable)[!unique(dual.cr$variable) %in% c("tau", "tau_cum", "cr", "cr_t", "ID", "N", "W", "M", "Mg")]

# create all possible variable combinations. 6 variable combination (4*4 with 4 same redundant and half of that) is expected
dual_var.comb <- tidyr::expand_grid(x = dual_var.ls,
                      y = dual_var.ls) %>% 
  filter(x != y) %>% 
  mutate(tmp = paste0(pmin(x, y), pmax(x, y))) %>% # eliminate same variable but different order
  slice_head(n = 1, by = tmp) %>% 
  select(-tmp)

dual_var.comb

# filter out intermediate data points in dynamic so we could have faster plotting. Also make wider for plotting
## filtering out all (none cr variable) value below 1 so negative vlues do not overwhelm disease curve. For cr, keep only value below or equal to 1
dual_cr.f <- dual.cr %>% 
  filter(variable %in% c(dual_var.ls, "cr") & case_when(variable != "cr" ~ value > 1, T~ value <=1)) %>% 
  tidyr::pivot_wider(names_from = variable, id_cols = c(time)) %>% 
  arrange(time) %>% 
  filter(row_number() %% 100 == 1) 

```

# plot
```{r}
# make path plot
## no logged on x and y
dual_curve.pl <- purrr::map2(.x = dual_var.comb$x, 
     .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = .y, y = .x, color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = .y, y = .x, color = "cr"), size = 3) +
          labs(color = "Conversion rate") + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          scale_x_continuous(labels = scientific) +
          scale_y_continuous(labels = scientific) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )

## logged on x
dual_curve_xlog.pl <- purrr::map2(
  .x = dual_var.comb$x, 
  .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = sprintf("log(%s)", .y), y = .x, color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1.5) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = sprintf("log(%s)", .y), y = .x, color = "cr"), size = 3) +
          labs(color = "Conversion rate",
            x = paste0("log10(", .y,")")) + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          scale_y_continuous(labels = scientific) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )

## logged on y
dual_curve_ylog.pl <- purrr::map2(
  .x = dual_var.comb$x, 
  .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = .y, y = sprintf("log(%s)", .x), color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1.5) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = .y, y = sprintf("log(%s)", .x), color = "cr"), size = 3) +
          labs(color = "Conversion rate",
            y = paste0("log10(", .x,")")) + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          scale_x_continuous(labels = scientific) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )


# both logged
dual_curve_log.pl <- purrr::map2(
  .x = dual_var.comb$x, 
  .y = dual_var.comb$y,
      ~{ggplot() +
          geom_path(data = dual_cr.f, aes_string(x = sprintf("log(%s)", .y), y = sprintf("log(%s)", .x), color = "cr"),
                    arrow = arrow(length = unit(c(rep(0, nrow(dual_cr.f) - 2), 0.25), "inches")),
                    size = 1.5) +
          geom_point(data = dual_cr.f %>% filter(row_number() %% 10 ==1), 
                     aes_string(x = sprintf("log(%s)", .y), y = sprintf("log(%s)", .x), color = "cr"), size = 3) +
          labs(color = "Conversion rate",
               x = paste0("log10(", .y,")"),
            y = paste0("log10(", .x,")")) + # note the labels are reversed because we set x= y and y = x above
          scale_color_viridis_c(limits = c(0, 1)) +
          theme_bw() +
          theme(plot.margin = unit(rep(0.3,4), "lines"),
                legend.position = "none")
        }
          )

# save all 4 plates
ggarrange(plotlist = dual_curve.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve1.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")

ggarrange(plotlist = dual_curve_xlog.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve2.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")

ggarrange(plotlist = dual_curve_ylog.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve3.tiff"), units = "px", width = 1000, height =1000, scale = 1.5, dpi=300,  bg = "white")

ggarrange(plotlist = dual_curve_log.pl, ncol = 2, nrow = 3, align = "hv")
ggsave(here("figures/plos-bio/dual_curve4.tiff"), units = "px", width = 1000, height = 1000, scale = 1.5, dpi=300,  bg = "white")
```


#============================================#
# real life disease curve
#============================================#
# execute code from report 10 to get final dataset
The following graphs will be made:
- R vs iRBC
- R log10 vs iRBC
- R vs iRBC log10
- R log10 vs iRBC log10

- G vs iRBC
- G log10 vs iRBC
- G vs iRBC log10
- G log10 vs iRBC log10

- R vs G
- R log10 vs G
- R vs G log10
- R log10 vs G log10

R on y-axis
```{r}
r_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = asex)) +
  geom_path(aes(y = RBC, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = asex)) +
  geom_path(aes(y = log10(RBC), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_ilog.dc <-ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(asex))) +
  geom_path(aes(y = RBC, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(asex))) +
  geom_path(aes(y = log10(RBC), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

G on y-axis
```{r}
g_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = asex)) +
  geom_path(aes(y = gam, x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_i.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = asex)) +
  geom_path(aes(y = log10(gam), x = asex, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "iRBC per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

g_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = gam, x = log10(asex))) +
  geom_path(aes(y = gam, x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "Gametocyte per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

glog_ilog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(gam), x = log10(asex))) +
  geom_path(aes(y = log10(gam), x = log10(asex), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(iRBC) per µL", y = "log10(Gametocyte) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

R vs G
```{r}
r_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = gam)) +
  geom_path(aes(y = RBC, x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_g.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = gam)) +
  geom_path(aes(y = log10(RBC), x = gam, colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "Gametocyte per µL", y = "log10(RBC per µL)", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

r_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = RBC, x = log10(gam))) +
  geom_path(aes(y = RBC, x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "RBC per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))

rlog_glog.dc <- ggplot(exp_ss.df) +
  geom_point(aes(y = log10(RBC), x = log10(gam))) +
  geom_path(aes(y = log10(RBC), x = log10(gam), colour = day, group = id), arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  theme_bw() + 
  scale_color_viridis_c(limits = c(3, 21)) +
  labs(x = "log10(Gametocyte) per µL", y = "log10(RBC) per µL", color = "Days\npost-infection") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE))
```

# plot together
```{r}
ggarrange(r_i.dc, rlog_i.dc, r_ilog.dc, rlog_ilog.dc,
          g_i.dc, glog_i.dc, g_ilog.dc, glog_ilog.dc,
          r_g.dc, rlog_g.dc, r_glog.dc, rlog_glog.dc, 
          ncol = 4, nrow = 3, align = "hv", common.legend = T)
ggsave(here("figures/plos-bio/exp_disease-curve.tiff"), units = "px", width = 2250, height = 1500, scale = 2, dpi=300,  bg = "white")
```

#===========================================#
# static competition
#===========================================#
#------- heat map ---------------#
# calculate fitness difference for 20 days
```{r}
# get dynamics
static.ls <- list.files(path = here("data/ci_static/"), pattern = "*.parquet", full.names = T)
static.ls <- lapply(static.ls, read_parquet)

# get fitness at day 20 (optimized for 20 days)
static_fitness.ls <- mclapply(static.ls, function(x){
  x %>% filter(time == 20 & variable %in% c("tau_cum1", "tau_cum2"))
})
static_fitness.df <- do.call(rbind, static_fitness.ls)

static_fitness.df <- tidyr::pivot_wider(static_fitness.df, names_from = "variable", id_cols = c("id_1", "id_2")) %>% 
  group_by(id_1, id_2) %>% 
  mutate(fitness_difference = tau_cum1-tau_cum2)
write.csv(static_fitness.df, here("data/ci_static.csv"))
```

# import and process data
```{r}
# import in dataset
static.df <- read.csv(here("data/ci_static.csv"))
ez_label <- read.csv(here("data/ez_label.csv"))

# join with labelling
static.df2 <- static.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("id_1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, label_ci_2 = label_ci), by = c("id_2" = "id_ci")) %>% 
  select(label_ci_1, label_ci_2, fitness_difference) %>% 
  mutate(label_ci_1 = gsub("log", "log10", label_ci_1),
         label_ci_2 = gsub("log", "log10", label_ci_2))

# get reverse order, which is simply invovles switching the cues around the multiplying the fitness by negative 1
static.df3 <- static.df2
names(static.df3) <- c("label_ci_2", "label_ci_1", "fitness_difference")
static.df3$fitness_difference <- static.df2$fitness_difference * -1

# join
static.df4 <- rbind(static.df2, static.df3)

# get mean
static.mean <- static.df4 %>% 
  distinct(label_ci_1, label_ci_2, .keep_all = T) %>% 
  group_by(label_ci_1) %>% 
  summarize(mean_fitness = mean(fitness_difference, na.rm = T))
  

# join static.df4 with mean for order sorting
static.df5 <- static.df4 %>% 
  left_join(static.mean, by = "label_ci_1") %>% 
  mutate(label_ci_1 = fct_reorder(label_ci_1, mean_fitness),
         label_ci_2 = fct_relevel(label_ci_2, levels(static.df5$label_ci_1)))
```

# plot
```{r}
# heatmap
static.pl1 <- ggplot(data = static.df5, aes(x = label_ci_2, y = label_ci_1, mean_fitness, fill = fitness_difference))+
  geom_tile(color = "black") +
  scale_fill_gradient2(low = "#fc8d59", high = "#4575b4", mid = "white", 
   midpoint = 0, space = "Lab", lim = c(-0.95, 0.95), name="Fitness\ndifference") +
  theme_minimal() +  
  theme(legend.position = "left",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = -1, l = -1, unit = "pt")) + 
  labs(x = "Strain 2 cue", y = "Strain 1 cue") +
  coord_fixed()

# mean 
static.pl2 <- ggplot() +
  geom_bar(data =  static.mean, aes(y = fct_reorder(label_ci_1, mean_fitness), x = mean_fitness), stat = "identity") +
  labs(y = "", x = "Mean fitness\ndifference") +
  theme_bw() +
  theme(plot.margin = margin(l = 0),
       panel.grid.major = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
    axis.text.y=element_blank(),
    axis.ticks.y=element_blank())

ggarrange(static.pl1, static.pl2, align = "hv", widths = c(1, 0.2))
ggsave(here("figures/plos-bio/static_competition_a.tiff"), units = "px", width = 2250, height = 1500, scale = 1.2, dpi=300,  bg = "white")
```

#------ effect cue perception -------#
## logging
```{r}
# get non-logged pairings
static_nolog <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")

static_log <- static.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

static_log.df <- left_join(
  select(static_nolog, cue_1, label_ci_2, log_1, None = fitness_difference),
  select(static_log, cue_1, label_ci_2, log_1, Log = fitness_difference),
  by = c("cue_1", "label_ci_2")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))
```

# combined
```{r}
static_nocomb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

static_comb <- static.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
static_comb.df <- left_join(
  select(static_nocomb, cue_1, label_ci_2, log_1, Self = fitness_difference),
  select(static_comb, cue_1, label_ci_2, log_1, Total = fitness_difference),
  by = c("cue_1", "log_1", "label_ci_2")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
```

# plot
```{r}
static_log.pl <- ggpaired(static_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = -0.1)

static_comb.pl <- ggpaired(static_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = -0.2)

ggarrange(static_log.pl, static_comb.pl, ncol = 1, nrow = 2, align = "v")
ggsave(here("figures/plos-bio/static_competition_b.tiff"), units = "px", width = 1000, height = 2000, scale = 1.2, dpi=300,  bg = "white")
```

#===========================================#
static competition dynamics
#===========================================#
#-----------G log10------------#
```{r}
# import in dynamics that contains G_log
Glog_static.ls <- list.files(here("data/ci_static"), pattern = ".*_G-i_log.*\\.parquet", full.names = T)
Glog_static.dyn <- lapply(Glog_static.ls, read_parquet)
Glog_static.dyn <- do.call(rbind, Glog_static.dyn) 
Glog_static.dyn2 <- Glog_static.dyn %>% 
  left_join(select(ez_label, id_ci, label_ci), by = c("id_1" = "id_ci"))

# id_1 represents competitor, id_2 represents G-i log
unique(Glog_static.dyn$id_1)
unique(Glog_static.dyn$id_2)

# plotting conversion rate dynamic 
Glog_cr.pl <- Glog_static.dyn2 %>% 
  filter(variable == "cr_2") %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "", y = "Competing cues", fill = "Conversion rate") +
  scale_fill_viridis_c() +
  theme_bw() +
  theme(axis.title.x=element_blank())

# plotting G
Glog_G.pl <- Glog_static.dyn2 %>% 
  filter(variable == "G2") %>% 
  mutate(value = log10(value)) %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "Time (days)", y = "Competing cues", fill = "Gametocyte (log10)") +
  scale_fill_viridis_c(option = "C") +
  theme_bw()+
  theme(axis.title.x=element_blank())

# plotting I 
Glog_I.pl <- Glog_static.dyn2 %>% 
  filter(variable == "I2") %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "Time (days)", y = "Competing cues", fill = "Asexual iRBC") +
  scale_fill_viridis_c(option = "A") +
  theme_bw() +
  theme(axis.title.x=element_blank())

# plotting tau
Glog_tau.pl <- Glog_static.dyn2 %>% 
  filter(variable == "tau2") %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "Time (days)", y = "Competing cues", fill = "Transmission\npotential") +
  scale_fill_viridis_c(option = "B") +
  theme_bw() 

# plot together
Glog_dyn.pl <- ggarrange(Glog_cr.pl, Glog_I.pl, Glog_G.pl, Glog_tau.pl, ncol = 1, align = "v")

```

#-----------R as cue-----------#
```{r}
# import in dynamics that contains R_none
R_static.ls <- list.files(here("data/ci_static"), pattern = "^R_none.*.parquet", full.names = T)
R_static.dyn <- lapply(R_static.ls, read_parquet)
R_static.dyn <- do.call(rbind, R_static.dyn)
R_static.dyn2 <- R_static.dyn %>% 
  left_join(select(ez_label, id_ci, label_ci), by = c("id_2" = "id_ci"))

# id_1 is R, id_2 is competitor
unique(R_static.dyn$id_2)

# plotting conversion rate dynamic 
R_cr.pl <- R_static.dyn2 %>% 
  filter(variable == "cr_1") %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "", y = "Competing cues", fill = "Conversion rate") +
  scale_fill_viridis_c() +
  theme_bw() +
  theme(axis.title.x=element_blank())

# plotting R
R_R.pl <- R_static.dyn2 %>% 
  filter(variable == "R") %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "Time (days)", y = "Competing cues", fill = "RBC") +
  scale_fill_viridis_c(option = "C") +
  theme_bw()+
  theme(axis.title.x=element_blank())

# plotting I 
R_I.pl <- R_static.dyn2 %>% 
  filter(variable == "I1") %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "Time (days)", y = "Competing cues", fill = "Asexual iRBC") +
  scale_fill_viridis_c(option = "A") +
  theme_bw() +
  theme(axis.title.x=element_blank())

# plotting tau
R_tau.pl <- R_static.dyn2 %>% 
  filter(variable == "tau1") %>% 
ggplot(aes(x = time, y = label_ci, fill = value)) +
  geom_raster() +
  xlim(1, 14) +
  labs(x = "Time (days)", y = "Competing cues", fill = "Transmission\npotential") +
  scale_fill_viridis_c(option = "B") +
  theme_bw() 

R_dyn.pl <- ggarrange(R_cr.pl, R_I.pl, R_R.pl, R_tau.pl, ncol = 1, align = "v")
```

# plot together
```{r}
ggarrange(Glog_dyn.pl, R_dyn.pl, ncol = 2, labels = c("A", "B"), align = "h")
ggsave(here("figures/plos-bio/static_dyn.tiff"), units = "px", width = 2200, height = 2000, scale = 1.2, dpi=300,  bg = "white")
```


#===========================================#
# invasion analysis
#===========================================#
# import in data (already 20 days )
```{r}
invade.df <- read.csv(here("data/ci_invasion.csv"))
```


# process data for invasion matrix
```{r}
invade.mat <- invade.df %>% 
  group_by(V1 = pmin(mut_id, res_id), V2 = pmax(mut_id, res_id)) %>% # group by cue competition, irregardless of order
  mutate(id_alt = paste0(V1, V2),
         invade = case_when(
           fitness > 0 ~ "invade",
           fitness < 0 ~ "not invade"
         )) %>% 
  group_by(id_alt) %>% 
  mutate(
    mut_is_V1 = case_when(
    mut_id == V1 ~ "V1_invade",
    mut_id != V1 ~ "V1_invaded")) %>% 
  arrange(id_alt) %>% 
  select(fitness, V1, V2, id_alt, invade, mut_is_V1) %>% 
  tidyr::pivot_wider(names_from = mut_is_V1, values_from = fitness) %>% 
  group_by(id_alt) %>% 
  mutate(V1_invade2 = gsub("NA", "", paste0(V1_invade, collapse = "")),
         V1_invaded2 = gsub("NA", "", paste0(V1_invaded, collapse = ""))) %>% 
  distinct(id_alt, .keep_all = T) %>% 
  mutate(
    category = case_when(
    V1_invade2 > 0 & V1_invaded2 > 0 ~ "Mutual invasion",
    V1_invade2 > 0 & V1_invaded2 < 0 ~ "Only strain 1 invasion",
    V1_invade2 < 0 & V1_invaded2 > 0 ~ "Only strain 2 invasion",
    V1_invade2 < 0 & V1_invaded2 < 0 ~ "Mutual non-invasion"
  )) %>% 
  select(V1, V2, invasion = category)

invade.df %>% filter(mut_id == "G-i_none")
invade.df %>% filter(res_id == "G-i_none")
```


```{r}
# for plotting, need to get all same cue vs same cue, which we will set to NA
invade.NA <- cbind.data.frame(`V1` = unique(invade.mat$V1),
      `V2` = unique(invade.mat$V1),
      invasion = NA)

invade.mat2 <- rbind(invade.mat, invade.NA)

# get label
invade.mat3 <- invade.mat2 %>% 
  left_join(select(ez_label, id_ci, V1_label = label_ci), by = c("V1" = "id_ci")) %>% 
  left_join(select(ez_label, id_ci, V2_label = label_ci), by = c("V2" = "id_ci")) %>% 
  mutate(V1_label = gsub("log", "log10", V1_label),
                                      V2_label = gsub("log", "log10", V2_label))


invade.mat4 <- rbind(
  select(invade.mat3, V1_label, V2_label, invasion),
  select(invade.mat3, V2_label = V1_label, V1_label = V2_label) %>% mutate(invasion = NA)) %>%
  mutate(
    invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  )) %>% 
  filter(!is.na(V1_label))

invade.mat4$V1_label <- factor(invade.mat4$V1_label, levels =  c("G log10", "G", "G1+G2", "I log10", "I", "I+Ig log10", "I+Ig", "I1+I2 log10", "I1+I2", "Ig log10", "Ig", "Ig1+Ig2", "R log10", "R", "sum log10", "sum"))
invade.mat4$V2_label <- factor(invade.mat4$V2_label, levels =  c("G", "G1+G2", "I log10", "I", "I+Ig log10", "I+Ig", "I1+I2 log10", "I1+I2", "Ig log10", "Ig", "Ig1+Ig2", "R log10", "R", "sum log10", "sum", "G log10"))
```


# plot invasion matrix
```{r}
invasion.pl1 <- ggplot(data = invade.mat4, aes(x = V2_label, y = V1_label, fill = invasion_2)) +
  geom_tile(color = "black") +
  theme_minimal() +  
  theme(legend.position = "right",
  axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  text = element_text(size = 15),
  axis.ticks = element_blank(),
  plot.margin=margin(r = 0)) + 
  labs(fill = "Invasibility", x = "Competing cue", y = "Reference cue") +
  scale_x_discrete(limits = rev) +
  scale_y_discrete(limits = rev) +
  scale_fill_manual(values = c("Competitive exclusion\nof another cue" = "#4575b4",
                               "Competitive exclusion\nby another cue" = "#fc8d59",
                               "Mutual invasion" = "#fee090"),
                    na.value = "white") +
  theme(legend.position = "none")
```

# create summary bar chart
```{r}
# create a stacked barchart for summary
## filter out na
invade.matalt <- invade.mat3 %>% na.exclude()

# get frquency from both sides. Note when grouping for V2, from the perspective of cue 2, scenarrio when strain 2 invade = strain 1 invade
invade.matalt1 <- invade.matalt %>% group_by(V1_label, invasion) %>% 
  summarize(frequency_1 = n())

invade.matalt2 <- invade.matalt %>%
  mutate(invasion_alt = case_when(
    invasion == "Only strain 1 invasion" ~ "Only strain 2 invasion",
    invasion == "Only strain 2 invasion" ~ "Only strain 1 invasion",
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Mutual non-invasion" ~ "Mutual non-invasion"
  )) %>% 
  group_by(V2_label, invasion_alt) %>% 
  summarize(frequency_2 = n())     

# full join and sum. has confirmed all of them add up to 14 
invade.matalt3 <- full_join(invade.matalt1, invade.matalt2, by = c("V1_label" = "V2_label", "invasion" = "invasion_alt"))

invade.matalt3[is.na(invade.matalt3)] <- 0
invade.matalt4 <- invade.matalt3 %>% 
  mutate(freq = frequency_1 + frequency_2) %>% 
  mutate(temp = case_when(
    invasion == "Only strain 1 invasion" ~ freq
  )) %>% 
  group_by(V1_label) %>% 
  mutate(invade_1_freq = max(temp, na.rm = T)) %>% 
  mutate(invasion_2 = case_when(
    invasion == "Mutual invasion" ~ "Mutual invasion",
    invasion == "Only strain 1 invasion" ~ "Competitive exclusion\nof another cue",
    invasion == "Only strain 2 invasion" ~ "Competitive exclusion\nby another cue"
  ))
```



```{r}
invasion.pl2 <- ggplot() +
  geom_bar(data = invade.matalt4, aes(x = freq, y = reorder(V1_label, invade_1_freq), fill = forcats::fct_rev(factor(invasion_2, levels = c("Competitive exclusion\nof another cue", "Competitive exclusion\nby another cue", "Mutual invasion", "Mutual non-invasion")))), stat = "identity") +
  labs(x = "Frequency", fill = "Invasibility", y = "Cue") +
  theme_bw()  +
  scale_fill_manual(values = c("Competitive exclusion\nof another cue" = "#4575b4",
                               "Competitive exclusion\nby another cue" = "#fc8d59",
                               "Mutual invasion" = "#fee090")) +
  theme(text = element_text(size = 15))
```

# plot together
```{r}
ggarrange(invasion.pl1, invasion.pl2, align = "h", common.legend = T, widths = c(2, 1))
ggsave(here("figures/plos-bio/invasion_a.tiff"), units = "px", width = 2250, height = 1100, scale = 1.4, dpi=300,  bg = "white")
```

#---------------- invasion pairwise comparison-----------------#
## proces data
```{r}
# join invade df with label because I am lazy
invade.df2 <- invade.df %>% 
  left_join(select(ez_label, id_ci, label_ci_1 = label_ci), by = c("mut_id" = "id_ci"))

invade.df2 
```

# log
```{r}
# get non-logged pairings
invade_nolog <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "none")


invade_log <- invade.df2 %>% 
  mutate(cue_1 = trimws(gsub("\\ .*", "", label_ci_1)),
         log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none")) %>% 
  filter(log_1 == "log")

invade_log.df <- left_join(
  select(invade_nolog, cue_1, res_id, log_1, None = fitness),
  select(invade_log, cue_1, res_id, log_1, Log = fitness),
  by = c("cue_1", "res_id")) %>% 
  filter(!is.na(None) & !is.na(Log)) %>% 
  mutate(classification = ifelse(Log > None, "Logged better", "Not logged better"))

invade_log
```

# combined
```{r}
invade_nocomb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "none")

invade_comb <- invade.df2 %>% 
  mutate(cue_1 = ifelse(
    str_detect(label_ci_1, "sum"), "I+Ig",
    trimws(gsub("+1.*|\\ log", "", label_ci_1))
  ),
  log_1 = case_when(
           str_detect(label_ci_1, "log") ~ "log",
           str_detect(label_ci_1, "log", negate = T) ~ "none"),
    comb_1 = case_when(
    str_detect(label_ci_1, "1+|sum") ~ "comb",
    str_detect(label_ci_1, "1+|sum", negate = T) ~ "none" 
  )) %>% 
  filter(comb_1 == "comb")
  
invade_comb.df <- left_join(
  select(invade_nocomb, cue_1, res_id, log_1, Self = fitness),
  select(invade_comb, cue_1, res_id, log_1, Total = fitness),
  by = c("cue_1", "log_1", "res_id")) %>% 
  filter(!is.na(Total) & !is.na(Self)) %>% 
  mutate(classification = ifelse(Total > Self, "Total better", "Self better"))
invade_comb.df
```

# plot
```{r}
invade_log.pl <- ggpaired(invade_log.df, cond1 = "None", cond2 = "Log", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Not logged better" = "#fc8d59", "Logged better" = "#4575b4"))+
  stat_compare_means(paired = TRUE, hjust = -0)

invade_comb.pl <- ggpaired(invade_comb.df, cond1 = "Total", cond2 = "Self", line.color = "classification", alpha = 0.5) +
  labs(x = "Cue perception", y = "Fitness difference", color = "Effect") +
  scale_color_manual(values = c("Total better" = "#fc8d59", "Self better" = "#4575b4"))+
  stat_compare_means(paired = TRUE, hjust = -0)

ggarrange(invade_log.pl, invade_comb.pl, align = "h", ncol = 2)
ggsave(here("figures/plos-bio/invasion_b.tiff"), units = "px", width = 2250, height = 900, scale = 1.2, dpi=300,  bg = "white")
```


#===========================================#
# Cue performance across single, co-infection, static, and invasion
#===========================================#
```{r}
# import in all the ranks
si_opt.df <- read.csv(here("data/si_opt.csv"))
ci_opt.df <- read.csv(here("data/ci_opt.csv"))
static.df <- read.csv(here("data/ci_static.csv"))
invasion.df <- read.csv(here("data/ci_invasion.csv"))

# calculate mean fitness
static.mean ## already calculated
invasion.mean <- rbind(select(invasion.df, cue = mut_id, fitness),
      select(invasion.df %>% mutate(fitness2 = fitness * -1), cue = res_id, fitness = fitness2)) %>% 
  group_by(cue) %>% 
  summarize(mean_fitness = mean(fitness)) %>% 
  arrange(desc(mean_fitness))

# calculate ranks
si.rank <- si_opt.df %>% 
  left_join(select(ez_label, id_ci, id_si, label = ez_label_si), by = c("id" = "id_si")) %>% 
  mutate(rank = dense_rank(-fitness_20),
         model = "Single infection") %>% 
  select(rank, model, id = id_ci, label)

ci.rank <- ci_opt.df %>% 
  left_join(select(ci_fitness.df, value, label), by = "label") %>% 
  left_join(select(ez_label, id_ci, id_si, ez_label), by = c("id" = "id_ci")) %>% 
  mutate(rank = rank(-value),
         model = "Co-infection") %>% 
  select(rank, model, id, label = ez_label)

static.rank <- static.mean %>% 
  mutate(label_ci = gsub("log10", "log", label_ci_1)) %>% 
  left_join(select(ez_label, id_ci, label_ci, ez_label), by = c("label_ci" = "label_ci")) %>% 
  mutate(rank = rank(-mean_fitness),
         model = "Static mixed genotype") %>% 
  select(rank, model, id = id_ci, label = ez_label)

invasion.rank <- invasion.mean %>% 
  left_join(select(ez_label, id_ci, ez_label), by = c("cue" = "id_ci")) %>% 
  mutate(rank = rank(-mean_fitness),
         model = "Invasive mixed genotype") %>% 
  select(rank, model, id = cue, label = ez_label) %>% 
  mutate(label = gsub("\n", " ", paste0("   ", label)))

static.rank
# concatenate
fitness.rank <- rbind(
  si.rank, ci.rank, static.rank, invasion.rank
) %>% 
  mutate(model = fct_relevel(model, c("Single infection", "Co-infection", "Static mixed genotype", "Invasive mixed genotype")))

# highlight the good ones
fitness.rank_good <- fitness.rank %>% 
  filter(id %in% c("Ig-i_log", "I-i+Ig-i_log", "sum_log", "G-i_log"))
```

# plot
```{r}
library(ggbump)

ggplot() +
  geom_bump(data = fitness.rank, aes(x = model, y = rank, group = id), color = "grey") +
  geom_point(data = fitness.rank, aes(x = model, y = rank, group = id), color = "grey", size = 3) +
  geom_bump(data = fitness.rank_good, aes(x = model, y = rank, group = id, color = id)) +
  geom_point(data = fitness.rank_good, aes(x = model, y = rank, group = id, color = id), size = 3) +
  geom_text(data = invasion.rank, aes(x = model, y = rank, label = label), size = 3.5, hjust = 0, inherit.aes = F) +
  scale_color_viridis_d() +
  theme_minimal() +
  coord_cartesian(clip = "off") +
  scale_y_reverse() +
  expand_limits(x = 5.5) +
  theme(legend.position = "none") +
  labs(x = "Model", y = "Mean fitness rank")
ggsave(here("figures/plos-bio/fitness_rank.tiff"), units = "px", width = 2250, height = 1500, scale = 1, dpi=300,  bg = "white")
```


#-=====================#
# Partitioning best cue
#=====================-#
#------- single infection -----------#
# redo some optimization (lower fitness in no R than default)
```{r}
source(here("functions/chabaudi_si_clean_R.R"))
source(here("functions/chabaudi_si_clean_N.R"))
# I none
cl <- makeCluster(detectCores()); setDefaultCluster(cl = cl)
I_no_R <- optimParallel(
    par = rep(0.5,4), # start at 0.5x4
    fn = chabaudi_si_clean_R, 
    control = list(trace = 6, fnscale = -1),
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  seq(0, 6*(10^6), by = (6*(10^6))/5000),
    cue = "I",
    log_cue = "none",
    solver = "vode")
stopCluster(cl)
# 0.144021 -43.1046 2030.27 -524.686 
# 8.69589
```

# import and process data
```{r}
# import in data
si_partition.ls <- list.files(path = here("data/partition/si/"), pattern = "*.csv", full.names = T)
si_partition.ls <- lapply(si_partition.ls, read.csv)
si_partition.df <- do.call(rbind, si_partition.ls)

# combine with si fitness (default)
si_partition.df <- si_partition.df %>% left_join(select(si_fitness.df, id, fitness = value), by = "id")

# make longer
si_partition.df2 <- tidyr::pivot_longer(si_partition.df, c(fitness_R, fitness_N, fitness_W, fitness))

# calculate coefficient of variation. Also rename
si_partition.df2 <- si_partition.df2 %>% 
  group_by(name) %>% 
  mutate(cv = sd(value)/mean(value)*100,
         mean = mean(value),
         category = case_when(
           name == "fitness_R" ~ "No RBC limitation",
           name == "fitness_W" ~ "No targeted immunity",
           name == "fitness_N" ~ "No indiscriminate\nimmunity",
           name == "fitness" ~ "Default"
         ))

```

# plot
```{r}
library(ungeviz)
# raw fitness
si_partition.pl1 <- ggplot() +
  geom_vpline(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = mean, group = category, color = category), show.legend = F, size = 1) +
  geom_point(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value), size = 2, alpha = 0.7) +
  geom_line(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = value, group = id), alpha = 0.2) +
  labs(x = "Fitness", y = "Conditions") +
  theme_bw()

# coefficient of variation
si_partition.pl2 <- ggplot() +
  geom_bar(data = si_partition.df2, aes(y = fct_reorder(category, mean), x = cv), stat = "identity") +
  labs(x = "Coefficient of\nvariation (%)", y = "") +
  theme_bw() +
  theme(axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

si_partition.pl <- ggarrange(si_partition.pl1, si_partition.pl2, widths = c(1, 0.3), align = "h")
si_partition.pl

ggsave(here("figures/plos-bio/partition_fitness.tiff"), width = 7, height = 4)
```

#------- consequences of no targeted immunity ------------#
# get dynamics of no targeted immunity
```{r}
get_dyn <- function(df){
  
  source(here("functions/chabaudi_si_clean_W.R"))
  id <- df$id
  cue <- df$cue
  log <- df$log
  par <- c(df$var_W1, df$var_W2, df$var_W3, df$var_W4)
  cue_range <- seq(df$low, df$high, by = df$by)
  
  # get dynamics
  dyn <- chabaudi_si_clean_W(
    parameters_cr = par,
    immunity = "tsukushi",
    parameters = parameters_tsukushi,
    time_range = seq(0, 20, by = 1e-3),
    cue_range =  cue_range,
    cue = cue,
    log_cue = log,
    solver = "vode",
    dyn = T
  )
  
  # combine
  dyn2 <- cbind(dyn, id = id, cue = cue, log = log)
  
  write_parquet(dyn2, paste0(here("data/partition/si_dyn/"), id, "_noW_dyn.parquet"))
  
}
```

# get df to run
```{r}
# join with cue_range
cue_range_si.df <- read.csv(here("data/cue_range_si.csv"))
si_partition.df3 <- si_partition.df %>% left_join(select(cue_range_si.df, low, high, by, id), "id")

# lapply loop
si_partition.ls <- split(si_partition.df3, seq(nrow(si_partition.df3)))
mclapply(si_partition.ls, get_dyn)
```

# process dataframe
```{r}
# import in dataframe
no_W.ls <- list.files(here("data/partition/si_dyn/"), pattern = "*noW_dyn.parquet", full.names = T)
no_W.df <- lapply(no_W.ls, read_parquet)
no_W.df <- do.call(rbind, no_W.df)

# combine with ez label
ez_label <- read.csv(here("data/ez_label.csv"))
no_W.df <- left_join(no_W.df, ez_label, by = c("id" = "id_si"))

# get conversion rate 
no_W.cr <- no_W.df %>% filter(variable == "cr")
no_W.I <- no_W.df %>% filter(variable == "I")

# get default conversion rate dynamics
si_dyn.df <- left_join(si_dyn.df, ez_label, by = c("id" = "id_si"))
si_dyn.cr <- si_dyn.df %>% filter(variable == "cr")
si_dyn.I <- si_dyn.df %>% filter(variable == "I")
```

# plot conversion rate
mechanism: targeted immunity led to lower parasite density in the initial stages, which prevents parasites from making the switch from no conversion rate to high conversion rate. When parsite density undergoes drastic increase at the beginning due to lower immunity, this presents a higher degree of signal that allows parasite to make the switch appropriately
```{r}
partition_cr.pl <- ggplot() +
  geom_line(data = no_W.cr, aes(x = time, y= value, color = "No targeted immunity")) +
  geom_line(data = si_dyn.cr, aes(x = time, y= value, color = "Default")) +
  facet_wrap(~ez_label_si, ncol = 5) +
  xlim(0, 20) +
  geom_vline(xintercept = 7) +
  labs(x = "Time (days)", y = "Conversion rate", color = "Condition") +
  theme_bw()

no_W.cr
```

#----- cue state --------------#

# function to get cue states
```{r}
# function to get cue states
get_cue_state <- function(df){
  cue <- trimws(gsub("_log|_none", "", unique(df$id)))
  if(cue != "I+Ig"){
  df2 <- df %>% filter(variable == cue)
  if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  if(cue == "I+Ig"){
    df2 <- df %>% filter(variable %in% c("I", "Ig")) %>% 
      group_by(time) %>% 
      mutate(value = sum(value))
    
    if(str_detect(unique(df$id), "log")){
    df2 <- df2 %>% 
      mutate(value = log10(value))
  }
  }
  
  df2$value[df2$value == -Inf] <- 0
  
  write_parquet(df2, paste0(here("data/partition/si_default_state/"), unique(df$id), "_noW_state.parquet"))
}
```

# run function
```{r}
# split dynamics based on id
no_W.split <- split(no_W.df, no_W.df$id)

# run function
mclapply(no_W.split, get_cue_state)

# get dataframe
no_W.state <- list.files(here("data/partition/si_state/"), pattern = "*.parquet", full.names = T)
no_W.state <- lapply(no_W.state, read_parquet)
no_W.state <- do.call(rbind, no_W.state)
no_W.state$value[no_W.state$value < 0] <- 0

# get same for si infection
default.split <- split(si_dyn.df, si_dyn.df$id)
mclapply(default.split, get_cue_state)
default.state <- list.files(here("data/partition/si_default_state/"), pattern = "*.parquet", full.names = T)
default.state <- lapply(default.state, read_parquet)
default.state <- do.call(rbind, default.state)
default.state$value[default.state$value < 0] <- 0

# manually correct non-logging
I_Ig.corr <- no_W.state %>% filter(id == "I+Ig_log") %>% 
  mutate(value = log10(value))
I_Ig.corr$value[I_Ig.corr$value < 0] <- 0

no_W.state2 <- no_W.state %>% filter(id != "I+Ig_log")
no_W.state2 <- no_W.state2 %>% rbind(no_W.state2, I_Ig.corr)
```

# plot
absence of targeted immunity led to drastic increase in parasite density in early phases of infection. This produces high signal intensity for parasite and host-based cues, especially non-logged ones, which allows for state differentation. While this can be viewed as a modelling artifiact, it should be noted that the logged cues seldom changed as these changes in early infection did little to alter the actual early signal intensity sensed by the parasite. In an environment where there is heterogeneity in host response, and thus, signal, logging allows for parasites to adapt optimal strategy whereas non-logged cues must contend with sensitivity to immunity.
```{r}
# function to individually plot stuff
plot_state <- function(df1, df2){
  
  # plot state dynamics
  state_pl <- ggplot() +
  geom_line(data = df1, aes(x = time, y = value, color = name, group = name)) +
  facet_wrap(~ez_label_si, scales = "free") +
  xlim(1,20) +
  theme_bw() +
  theme(legend.position="none") +
  labs(x = "", y = "Cue") +
  scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59"))
  
  # plot conversion rate dynamics
  cr_pl <- ggplot() +
  geom_raster(data = df2, aes(x = time, y = name, fill = value)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.position = "none") +
    scale_fill_viridis_c(lim = c(0, 1))
  
  # arrange
  ggarrange(state_pl, cr_pl, ncol = 1, nrow = 2, align = "v", heights = c(1, 0.4))
  ggsave(paste0(here("figures/plos-bio/partition/"), unique(df1$id), ".tiff"), width = 4.5, height = 3.5)
}
```

# split
```{r}
# combine state
noW_default.state <- left_join(
  select(no_W.state2, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(default.state %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))

noW_default.state2 <- tidyr::pivot_longer(noW_default.state, c(`No targeted\nimmunity`, `Default`))
# combine conversion raster
noW_default.cr <- left_join(
  select(no_W.cr, time, `No targeted\nimmunity` = value, id, ez_label_si), 
  select(si_dyn.cr %>% filter(time <= 20), time, `Default` = value, id, ez_label_si), by = c("time", "id", "ez_label_si"))
noW_default.cr2 <- tidyr::pivot_longer(noW_default.cr, c(`No targeted\nimmunity`, `Default`))

# split
noW_default_state.ls <- split(noW_default.state2, noW_default.state2$id)
noW_default_cr.ls <- split(noW_default.cr2, noW_default.cr2$id)

# run function
mapply(plot_state, noW_default_state.ls, noW_default_cr.ls)
```

#-------- reaction norms of default vs optimized ------------#
# get reaction norm and rug data
```{r}
source(here("functions/par_to_df.R"))

# Gametocyte
g_log.rn <- par_to_df(par = c(1.211521,	-3.936778,	-1.312944,	-1.285713), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))
g_log.rn2 <- par_to_df(par = c(1.393860539,	-4.253007616,	-0.313947029,	-2.000857344), cue_range = seq(0, log10(6*(10^4)), by = (log10(6*(10^4)))/5000))

g.rn <- par_to_df(par = c(0.04061288,	-9.31445958,	74.13015506,	-431.5984364), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))
g.rn2 <- par_to_df(par = c(0.541729073,	-3.904616443,	0.87487412,	-0.694177021), cue_range = seq(0, 6*(10^4), by = (6*(10^4))/5000))

# I+Ig
I_Ig_log.rn <- par_to_df(par = c(3.594042,	4.157744,	-13.530672,	2.599905), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))
I_Ig_log.rn2 <- par_to_df(par = c(63.71893822,	-87.77671601,	-56.55475514,	-66.02209549), cue_range = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/5000))

I_Ig.rn <- par_to_df(par = c(0.3159297,	-46.1104558,	1250.752908,	-6.1982093), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))
I_Ig.rn2 <- par_to_df(par = c(0.731982784,	-21.69799449,	149.7841876,	17.02551769), cue_range = seq(0, 6*(10^6), by = (6*(10^6))/5000))

# convert log to non-logged scale
g_log.rn$cue_range <- 10^(g_log.rn$cue_range)
g_log.rn2$cue_range <- 10^(g_log.rn2$cue_range)
I_Ig_log.rn$cue_range <- 10^(I_Ig_log.rn$cue_range)
I_Ig_log.rn2$cue_range <- 10^(I_Ig_log.rn2$cue_range)

# get rug
g_log.rug <- default.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  select(label_si, value)

g_log.rug2 <- no_W.state %>% 
  filter(label_si == "G log") %>% 
  mutate(value = 10^value) %>% 
  filter(value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig_log.rug <- default.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

I_Ig_log.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig log") %>% 
  select(label_si, value)

g.rug <- default.state %>% 
  filter(label_si == "G") %>% 
  select(label_si, value)

g.rug2 <- no_W.state %>% 
  filter(label_si == "G" & value <= 6*(10^4)) %>% 
  select(label_si, value)

I_Ig.rug <- default.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

I_Ig.rug2 <- no_W.state %>% 
  filter(label_si == "I+Ig") %>% 
  select(label_si, value)

# get rug limits
rug_lim <- rbind(g_log.rug,
                 g_log.rug2,
                 I_Ig_log.rug,
                 I_Ig_log.rug2,
                 g.rug,
                 g.rug2,
                 I_Ig.rug,
                 I_Ig.rug2) %>% 
  group_by(label_si) %>% 
  summarize(max = max(hablar::s(value), na.rm = T),
            min = min(hablar::s(value), na.rm = T))

# combine and filter
rn <- rbind(
  cbind(g_log.rn, label_si = "G log", condition = "Default"),
  cbind(g_log.rn2, label_si = "G log", condition = "No targeted\nimmunity"),
  cbind(g.rn, label_si = "G", condition = "Default"),
  cbind(g.rn2, label_si = "G", condition = "No targeted\nimmunity"),
  cbind(I_Ig_log.rn, label_si = "I+Ig log", condition = "Default"),
  cbind(I_Ig_log.rn2, label_si = "I+Ig log", condition = "No targeted\nimmunity"),
  cbind(I_Ig.rn, label_si = "I+Ig", condition = "Default"),
  cbind(I_Ig.rn2, label_si = "I+Ig", condition = "No targeted\nimmunity")
) %>% 
  left_join(rug_lim, by = "label_si") %>% 
  group_by(label_si) %>% 
  filter(cue_range <= max & cue_range >= min)

# combine rug
rug <- rbind(cbind(g_log.rug, condition = "Default"),
             cbind(g_log.rug2, condition = "No targeted\nimmunity"),
             cbind(g.rug, condition = "Default"),
             cbind(g.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig_log.rug, condition = "Default"),
             cbind(I_Ig_log.rug2, condition = "No targeted\nimmunity"),
             cbind(I_Ig.rug, condition = "Default"),
             cbind(I_Ig.rug2, condition = "No targeted\nimmunity"))

# cobine with ezlabel
rn2 <- rn %>% left_join(ez_label, by = "label_si")
rug2 <- rug %>% left_join(ez_label, by = "label_si")

# filter rug
default.rug <- rug2 %>% filter(condition == "Default")
no.rug <- rug2 %>% filter(condition == "No targeted\nimmunity")
```

# plot
```{r}
ggplot() +
  geom_line(data = rn2, aes(x = cue_range, y = cr, color = condition)) +
  geom_rug(data = default.rug, aes(x = value, color = condition), sides = "b") +
  geom_rug(data = no.rug, aes(x = value, color = condition), sides = "t") +
  facet_wrap(~fct_relevel(ez_label_si, c("Gametocyte log10", "Gametocyte", "Asexual&sexual\niRBC log10", "Asexual&sexual iRBC")), scales = "free_x") +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE)) +
  theme_bw() +
  scale_color_manual(values =c("Default" = "#4575b4", "No targeted\nimmunity" = "#fc8d59")) +
  ylim(0, 1.1) +
  labs(x = "Cue range", y = "Conversion rate", color = "Condition")

ggsave(here("figures/plos-bio/partition_rn.tiff"), width = 7.5, height = 6)
```

# get conversion rate legend
```{r}
noW_default.cr %>% filter(id == "G_log") %>% 
ggplot() +
  geom_raster(aes(x = time, y = id, fill = Default)) +
  xlim(1,20) +
  theme_bw() +
    labs(x = "Time (days)",
         fill = "Conversion rate") +
  theme(axis.title.y=element_blank(),
        axis.ticks.y=element_blank(),) +
    scale_fill_viridis_c(lim = c(0, 1))

ggsave(here("figures/plos-bio/cr_legend.tiff"))
```

#================================#
# Disease curves for single, co-infection, and invasion
#===============================#
# get data for disease curves
```{r}
# single infection dynamics
si_dyn.df <- read_parquet(here("data/si_dyn/si_dyn_30.parquet")) 

# co-infection dynamics (mon-cue)
ci_dyn.df <- read_parquet(here("data/ci_dyn/ci_dyn.parquet"))

# dual cue dynamics
dual_dyn.df <- read_parquet(here("data/dual_cue_dyn/dual_cue_dyn.parquet"))
```

#------- single cue comparison ---------------#
# process data
```{r}
# get classification
si_cue.dv <- si_fitness.df %>% 
  mutate(classification = case_when(
    value > 9.2 ~ "High-performing",
    value <= 9.2 ~ "Poor-performing"
  ))

# process dynamics -> turn skinny
si_dc.df <- si_dyn.df %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  mutate(total = I+Ig)

# join with classificaiton
si_dc.df2 <- si_dc.df %>% left_join(select(si_cue.dv, id, classification), by = "id")
si_cue.dv
# split into top erforming and poor-performing cues
si_dc.high <- si_dc.df2 %>% filter(classification == "High-performing")
si_dc.poor <- si_dc.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
si_dc.high <- si_dc.high %>% left_join(ez_label %>% distinct(label_si, .keep_all = T), by = c("id" = "id_si"))

#write_parquet(si_dc.high, here("data/disease_curve/si_dc_high.parquet"))
#write_parquet(si_dc.poor, here("data/disease_curve/si_dc_poor.parquet"))
```

#--------- dual cue --------------------------#
# process data
```{r}
# turn skinny
dual_dc.df <- dual_dyn.df %>% 
  mutate(label_alt = paste(label, "+" , label_b)) %>% 
  select(label_alt, time, variable, value) %>% 
  filter(variable == "I" | variable == "Ig" | variable == "R") %>% 
  distinct(label_alt, time, variable, .keep_all = T)

dual_dyn.df

dual_dc.df2 <- dual_dc.df %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value, id_cols = c(time, label_alt)) %>%
  mutate(total = I+Ig)

#write_parquet(dual_dc.df2, here("data/disease_curve/dual_dc.parquet"))

# good dual cue -> list of good performing dual cues that encompass wide variety of cues
selected_dual_cue <- c("R log + I log", "R + Ig log", "G log + I log", "G log + Ig log", "Ig + I log")
bad_dual_cue <- c("G + I", "R + Ig", "R log + Ig", "G + R", "G + R log", "G + Ig", "Ig + I", "R + I", "R log + I")

# get classification -> R log10 + I log10 as the only good one
dual_dc.high <- dual_dc.df2 %>% filter(label_alt %in% selected_dual_cue) %>% 
  mutate(label_alt = gsub("log", "log10", label_alt))
dual_dc.poor <- dual_dc.df2 %>% filter(label_alt %in% bad_dual_cue) %>% 
  mutate(label_alt = gsub("log", "log10", label_alt))
#write_parquet(dual_dc.high, here("data/disease_curve/dual_dc_high.parquet"))
#write_parquet(dual_dc.poor, here("data/disease_curve/dual_dc_poor.parquet"))

```

#------- plotting single and dual cue disease map together-------#
# plot
```{r}
# import single infection data
si_dc.poor <- read_parquet(here("data/disease_curve/si_dc_poor.parquet"))
si_dc.high <- read_parquet(here("data/disease_curve/si_dc_high.parquet"))

# import dual cue data
dual_dc.high <- read_parquet(here("data/disease_curve/dual_dc_high.parquet"))
dual_dc.high2 <- dual_dc.high %>% 
  filter(label_alt == "R log10 + I log10")

# plot background dynamics
si_dc.pre <- ggplot() +
  geom_path(data = si_dc.poor, aes(x= total, y = R, group = id), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Single infection\ngood performing cues", x = "Asexual & sexual iRBC per µL", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

# plot colored dynamics
## combine single and dual cue dynamics
si_dc.high2 <- rbind(select(si_dc.high, ez_label, total, R, time),
      select(dual_dc.high2, ez_label = label_alt, total, R, time))

si_dc.pl <- si_dc.pre +
  geom_point(data = si_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
  geom_path(data = si_dc.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fdcb44", "#91bfdb","black", "#fc8d59"))  +
  theme(legend.position = "top") +
  labs(color = "Cues") +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
```

#---------- co-infection monocue -------------#
Note that we are plotting only single strain to show that on a single strain level, co-infection leads to larger area than single infection!
# process data
```{r}
# get relevent variables. 
ci_dc.df <- ci_dyn.df %>% 
  filter(variable %in% c("I1", "Ig1", "R"))

# morph into skinny format
ci_dc.df <- tidyr::pivot_wider(ci_dc.df, names_from = variable, values_from = value, id_cols = c(time, label)) %>% 
  mutate(total = I1+Ig1)

# good cue bad cue
ci_cue.dv <- ci_fitness.df %>% 
  mutate(classification = case_when(
    value > 2.25 ~ "High-performing",
    value <= 2.25 ~ "Poor-performing"
  ))

# join with classificaiton
ci_dc.df2 <- ci_dc.df %>% left_join(ci_cue.dv, by = "label")

# split into top erforming and poor-performing cues
ci_dc.high <- ci_dc.df2 %>% filter(classification == "High-performing")
ci_dc.poor <- ci_dc.df2 %>% filter(classification == "Poor-performing")

# join high performing with label
ci_dc.high2 <- ci_dc.high %>% left_join(ez_label, by = c("label" = "label_ci"))

write_parquet(ci_dc.high2, here("data/disease_curve/ci_dc_high.parquet"))
write_parquet(ci_dc.poor, here("data/disease_curve/ci_dc_poor.parquet"))
```

# plot
```{r}
ci_dc.poor <- read_parquet(here("data/disease_curve/ci_dc_poor.parquet"))
ci_dc.high2 <- read_parquet(here("data/disease_curve/ci_dc_high.parquet"))

# plot
ci_dc.pre <- ggplot() +
  geom_path(data = ci_dc.poor, aes(x= total, y = R, group = label), color = "dark grey", arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  labs(color = "Cues", x = "Asexual & sexual iRBC per µL (per strain)", y = "RBC per µL") +
  theme_bw()+ scale_y_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1)) +
  guides(shape = FALSE)

ci_dc.pl <- ci_dc.pre +
  geom_point(data = ci_dc.high2 %>% filter(row_number() %% 1000 ==0), aes(x = total, y = R, color = ez_label, shape = ez_label), size = 3) +
  geom_path(data = ci_dc.high2, aes(x= total, y = R, group = ez_label, color = ez_label), size = 1, arrow = arrow(type = "closed", angle = 10, length = unit(0.2, "inches"))) +
  scale_color_manual(values=c( "#4575b4", "#fc8d59", "#fdcb44", "#91bfdb")) +
  theme(legend.position = "top") +
  guides(color=guide_legend(nrow=2,byrow=TRUE)) +
  scale_x_continuous(labels = function(x) format(x, scientific = TRUE, accuracy = 0.1))

```

#--------- quantifying disease curve area ------------#
# function to calculate area between sets of points -> from https://stackoverflow.com/questions/3672260/area-covered-by-a-point-cloud-with-r
```{r}
library(splancs)
cha<-function(df){
  x <- df$total
  y <- df$R
chull(x,y)->i
return(areapl(cbind(x[i],y[i])))
}
```

# loop to get area: single infection
```{r}
# split df
si_dc.ls <- split(si_dc.df, si_dc.df$id)

# get area
si_dc.area <- cbind.data.frame(area = as.numeric(lapply(si_dc.ls, cha)), id_alt = names(lapply(si_dc.ls, cha)))

# join with fitness
si_fitness.df2 <- si_fitness.df %>% 
  select(id, fitness = value) %>% 
  distinct(id, fitness) %>% 
  left_join(ez_label, by = c("id" = "id_si"))

si_dc.area <- si_dc.area %>% 
  left_join(si_fitness.df2, by = c("id_alt" = "id"))
```

# dual cue
```{r}
# split
dual.dc <- read_parquet(here("data/disease_curve/dual_dc.parquet"))

# get area
dual_dc.area <- cbind.data.frame(area = as.numeric(lapply(dual_dc.ls, cha)), id_alt = names(lapply(dual_dc.ls, cha)))

# get R log+I log area: 293632850541
dual_dc.area %>% filter(id_alt == "R log + I log")
```

# coinfection
```{r}
# split
ci_dc.ls <- split(ci_dc.df, ci_dc.df$label)

# run function to find area
ci_dc.area <- cbind.data.frame(area = as.numeric(lapply(ci_dc.ls, cha)), id_alt = names(lapply(ci_dc.ls, cha)))

# join with fitness
ci_dc.area <- ci_dc.area %>% 
  left_join(select(ci_fitness.df, label, fitness = value), by = c("id_alt" = "label"))
ci_dc.area
```

#------ get fitted scatter plot for all single infection, co infection, and dual cue --------#
```{r}
#  combine single and dual cue
si_dual.area <- select(si_dc.area, ez_label_si, area, fitness) %>% rbind(data.frame(ez_label_si = "RBC log10 +\nasexual iRBC log10", area = 293632850541, fitness = 9.851735))

si_dual.area$area <- as.numeric(si_dual.area$area)
si_dual.area$fitness <- as.numeric(si_dual.area$fitness)

# plot
library("ggpmisc")

si_area.pl <- ggplot(data = si_dual.area, aes(x = area, y = fitness)) +
  geom_point() +
  stat_poly_line(color = "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness") +
  theme_bw()

ci_area.pl <- ggplot(data = ci.area, aes(x = area, y = value)) +
  geom_point() +
  stat_poly_line(color = "black") +
  stat_poly_eq() +
  labs(x = "Area", y = "Fitness", color = "Status") +
  theme_bw()

```


#------- plot together with disease curve --------#
```{r}
# single infection
si_vir.pl <- ggarrange(si_dc.pl, si_area.pl, align = "v", widths = c(1, 0.5))

# co-infection
ci_vir.pl <- ggarrange(ci_dc.pl, ci_area.pl, align = "v", widths = c(1, 0.5))
```


#--------- static area comparison -------------#
# compute area
```{r}
# import in dc dynamic and fitness
static_dc.df <- read_parquet(here("data/disease_curve/static_dc.parquet"))
static_fitness.df <- read.csv(here("data/ci_static.csv"))

# get winner and loser
static_dc.df4 <- static_dc.df %>% 
  left_join(select(static_fitness.df, id_1, id_2, fitness_difference), by = c("id_1", "id_2")) %>%
  filter(id_1 != id_2) %>% 
  mutate(
  total_winner = case_when(
    fitness_difference > 0 ~ total1,
    fitness_difference< 0 ~ total2
  ),
  total_loser = case_when(
    fitness_difference > 0 ~ total2,
    fitness_difference< 0 ~ total1
  ))%>% 
  filter(abs(fitness_difference) > 0.1)

# split by winner and loser
static_dc.ls1 <- split(select(static_dc.df4, R, total = total_winner), static_dc.df4$id_alt)
static_dc.ls2 <- split(select(static_dc.df4, R, total = total_loser), static_dc.df4$id_alt)

# get area
static_win.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls1, cha)), status = "Winner")
static_loser.area <- cbind.data.frame(area = as.numeric(lapply(static_dc.ls2, cha)), status = "Loser")

# pair
static.area <- cbind(select(static_win.area, Winner = area),
                     select(static_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
static.area 
```

# plot static
```{r}
static.area
static_area.pl <- ggpaired(static.area, cond1 = "Winner", cond2 = "Loser", line.color = "classification", alpha = 0.2) +
  labs(x = "Status", y = "Area", color = "Comparison\n(Static)") +
  scale_color_manual(values = c("Loser larger area" = "#fc8d59", "Winner larger area" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = 0) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
```


#--------- invasion area comparison -----------------#
# get area
```{r}
# import in dc dynamic and fitness
invasion_dc.df <- read_parquet(here("data/disease_curve/invasion_dc.parquet"))
invasion_fitness.df <- read.csv(here("data/ci_invasion.csv"))

# calculate area
invasion_dc.df4 <- invasion_dc.df %>% 
  left_join(invasion_fitness.df, by = c("mut_id", "res_id")) %>% 
  mutate(
  total_winner = case_when(
    fitness> 0 ~ total1,
    fitness< 0 ~ total2
  ),
  total_loser = case_when(
    fitness > 0 ~ total2,
    fitness < 0 ~ total1
  )) %>% 
  filter(abs(fitness) > 0.1)

invasion_dc.df4 %>% distinct(id_alt)

# split by winner and loser
invasion_dc.ls1 <- split(select(invasion_dc.df4, R, total = total_winner), invasion_dc.df4$id_alt)
invasion_dc.ls2 <- split(select(invasion_dc.df4, R, total = total_loser), invasion_dc.df4$id_alt)

# get area
invasion_win.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls1, cha)), status = "Winner")
invasion_loser.area <- cbind.data.frame(area = as.numeric(lapply(invasion_dc.ls2, cha)), status = "Loser")

# pair
invasion.area <- cbind(select(invasion_win.area, Winner = area),
                     select(invasion_loser.area, Loser = area)) %>% 
  mutate(classification = ifelse(Winner>Loser, "Winner larger area", "Loser larger area"))
invasion.area
```

# plot
```{r}
invasion_area.pl <-ggpaired(invasion.area, cond1 = "Winner", cond2 = "Loser", line.color = "classification", alpha = 0.2) +
  labs(x = "Status", y = "Area", color = "Comparison\n(Invasive)") +
  scale_color_manual(values = c("Loser larger area" = "#fc8d59", "Winner larger area" = "#4575b4")) +
  stat_compare_means(paired = TRUE, hjust = 0) +
  guides(color=guide_legend(nrow=2,byrow=TRUE))
```

#------ plot together -------------#
```{r}
# disease curves
dc.pl <- ggarrange(si_vir.pl, ci_vir.pl, ncol = 1, nrow = 2, align = "v", labels = c("A", "B"))

# pairwise comparison for static and invasive comeptition
heterocue_comp.pl <- ggarrange(static_area.pl, invasion_area.pl, ncol = 1, nrow = 2, align = "v", labels = c("C", "D"))

# join inthe other disease curves
ggarrange(dc.pl, heterocue_comp.pl, ncol = 2, nrow = 1, widths= c(1, 0.5))

ggsave(here("figures/plos-bio/virulence.tiff"), units = "px", width = 2300, height = 2000, scale = 1.5, dpi=300, bg = "white")
```


#====================================#
# dual cue dynamics figure
#===================================#

# get dual dynamics
```{r}
dual.dyn <- chabaudi_si_clean(
  parameters_cr = c(4.446192033,	10.97518275,	1.38762817,	23.3059254,	-3.452052371,	-18.0070692,	39.66614226,	-3.545193141,	18.78350799),
  immunity = "tsukushi",
  parameters = parameters_tsukushi,
  time_range = seq(0, 30, by = 1e-3),
  cue_range =  seq(6, 7, by = 1/500),
  cue_range_b = seq(0, log10(6*(10^6)), by = (log10(6*(10^6)))/500),
  cue = "R",
  cue_b = "I",
  log_cue = "log10",
  log_cue_b = "log10",
  solver = "vode",
  dyn = T
)

# filter out relevent dataframes
dual.dyn_f <- dual.dyn %>% 
  filter(variable %in% c("I", "Ig", "G", "R", "N", "W"))

# cr only
dual.dyn_cr <- dual.dyn %>% filter(variable == "cr")
```

# plot
```{r}
dual_I.plt <- ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "I"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "I" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Asexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_Ig.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "Ig"), aes(x = time, y = value/(10^5)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "Ig" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^5)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^5, name="Sexual iRBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_G.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "G"), aes(x = time, y = value/(10^4)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "G" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^4)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^4, name="Gametocyte per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_R.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "R"), aes(x = time, y = value/(10^7)),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "R" & row_number() %% 1000 == 0), 
             aes(x = time, y = value/(10^7)), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*10^7, name="RBC per µL")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_N.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "N"), aes(x = time, y = value*10),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "N" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*10), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.1, name="Indiscriminate immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")

dual_W.plt <-ggplot() +
  geom_line(data = dual.dyn_f %>% filter(variable == "W"), aes(x = time, y = value*2),
            color = "#4575b4") +
  geom_point(data = dual.dyn_f %>% filter(variable == "W" & row_number() %% 1000 == 0), 
             aes(x = time, y = value*2), size = 2, color = "#4575b4") +
  geom_line(data = dual.dyn_cr, aes(x = time, y = value)) +
  scale_y_continuous(name = "Conversion rate",
                     sec.axis = sec_axis(~.*0.5, name="Targeted immunity")) +
  labs(x = "Time (days)") +
  xlim(0, 20) +
  theme_bw() +
  theme(legend.position = "none")
```

# plot together
```{r}
ggarrange(dual_I.plt, dual_Ig.plt, dual_G.plt, dual_R.plt, dual_N.plt, dual_W.plt, nrow = 3, ncol = 2, align = "hv", labels = c("A", "B", "C", "D", "E", "F"))
ggsave(here("figures/plos-bio/dual_dyn.tiff"), units = "px", width = 2250, height = 2000, scale = 1, dpi=300, bg = "white")
```

#======================================#
# Single and co-infection verification
#======================================#
# single infection
```{r}
# import in all single infection data
si_val.ls <- list.files(path = here("data/si_validation"), pattern = "*.csv", full.names = T)

si_val.df <- lapply(si_val.ls, read.csv)
si_val.df <- do.call(rbind, si_val.df)

# get max fitness from simulation. left join with si_opt
si_opt.df <- read.csv(here("data/si_opt.csv"))

# we can see that all of the randomly simulated models have a fitness value that is less than the optimized model
si_val.df2 <- select(si_val.df, V1, id) %>%
  left_join(si_opt.df, by =c("id" = "id")) %>% 
  mutate(fitness_difference = fitness_20 - V1) %>% 
  left_join(select(ez_label, id_si, ez_label_si), by = c("id" = "id_si"))
```

### Model validation
```{r}
# read in the results file
ci_val.ls <- list.files(path = here("data/ci_validation"), pattern = "*.csv", full.names = T)
ci_val.ls2 <- lapply(ci_val.ls, read.csv)
ci_val.df <- do.call(rbind, ci_val.ls2)

ci_val.df2
ci_val.df2 <- ci_val.df %>% 
  left_join(select(ez_label, label_ci, ez_label), by = c("label" = "label_ci"))
```

# plot
```{r}
si_val.plt <- ggplot(data = si_val.df2, aes(x = fitness_difference)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label_si, scales = "free", ncol = 3) +
  labs(x = "Optimized fitness - random fitness", y = "Frequency") +
  theme_bw()

ci_val.plt <- ggplot(data = ci_val.df2, aes(x = V1)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = 0, color = "#fc8d59") +
  facet_wrap(~ez_label, scales = "free", ncol = 4) +
  labs(x = "Fitness difference between\noptimized and random strain", y = "Frequency") +
  theme_bw()

ggarrange(si_val.plt, ci_val.plt, align = "hv", labels = c("A", "B"), widths = c(3,4))
ggsave(here("figures/plos-bio/validation.tiff"), units = "px", width = 2250, height = 1300, scale = 1.6, dpi=300, bg = "white")
```

#=========================#
# Monte carlo dynamics supplementary
#=========================#
# run code in report 16
```{r}
mc_dyn_a <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = cr)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = cr_bot, ymax = cr_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Conversion rate") +
  theme_bw()
  
# plot fitness timeseries. When if tiness lost? At the latter part
mc_dyn_b  <- ggplot() +
  geom_line(data = reference.df, aes(x = time, y = tau)) +
  geom_ribbon(data = diff.df, aes(x = time, ymin = tau_bot, ymax = tau_top), alpha = 0.5, fill = "#fc8d59") +
  facet_wrap(~cue, ncol = 2) +
  labs(x = "Time (days)", y = "Transmission potential") +
  theme_bw()

ggarrange(mc_dyn_a, mc_dyn_b, ncol = 1, align = "v", labels = c("A", "B"))
ggsave(here("figures/plos-bio/MC_dyn.tiff"), units = "px", width = 2000, height = 2600, scale = 1, dpi=300, bg = "white")
```



